1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool zeropivotdetected; 21 22 PetscFunctionBegin; 23 if (a->idiagvalid) { 24 if (values) *values = a->idiag; 25 PetscFunctionReturn(0); 26 } 27 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 28 diag_offset = a->diag; 29 if (!a->idiag) { 30 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 31 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 32 } 33 diag = a->idiag; 34 mdiag = a->idiag+bs2*mbs; 35 if (values) *values = a->idiag; 36 /* factor and invert each block */ 37 switch (bs) { 38 case 1: 39 for (i=0; i<mbs; i++) { 40 odiag = v + 1*diag_offset[i]; 41 diag[0] = odiag[0]; 42 mdiag[0] = odiag[0]; 43 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 44 diag += 1; 45 mdiag += 1; 46 } 47 break; 48 case 2: 49 for (i=0; i<mbs; i++) { 50 odiag = v + 4*diag_offset[i]; 51 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 52 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 53 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr); 54 diag += 4; 55 mdiag += 4; 56 } 57 break; 58 case 3: 59 for (i=0; i<mbs; i++) { 60 odiag = v + 9*diag_offset[i]; 61 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 62 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 63 diag[8] = odiag[8]; 64 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 65 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 66 mdiag[8] = odiag[8]; 67 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,!A->erroriffailure,&zeropivotdetected);CHKERRQ(ierr); 68 if (zeropivotdetected) break; 69 diag += 9; 70 mdiag += 9; 71 } 72 break; 73 case 4: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 16*diag_offset[i]; 76 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 77 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 78 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr); 79 diag += 16; 80 mdiag += 16; 81 } 82 break; 83 case 5: 84 for (i=0; i<mbs; i++) { 85 odiag = v + 25*diag_offset[i]; 86 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 87 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 88 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr); 89 diag += 25; 90 mdiag += 25; 91 } 92 break; 93 case 6: 94 for (i=0; i<mbs; i++) { 95 odiag = v + 36*diag_offset[i]; 96 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 97 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr); 99 diag += 36; 100 mdiag += 36; 101 } 102 break; 103 case 7: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 49*diag_offset[i]; 106 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 108 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr); 109 diag += 49; 110 mdiag += 49; 111 } 112 break; 113 default: 114 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 115 for (i=0; i<mbs; i++) { 116 odiag = v + bs2*diag_offset[i]; 117 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 118 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 119 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr); 120 diag += bs2; 121 mdiag += bs2; 122 } 123 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 124 } 125 a->idiagvalid = PETSC_TRUE; 126 if (zeropivotdetected) { 127 A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 } 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSOR_SeqBAIJ" 134 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 135 { 136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 137 PetscScalar *x,*work,*w,*workt,*t; 138 const MatScalar *v,*aa = a->a, *idiag; 139 const PetscScalar *b,*xb; 140 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 141 PetscErrorCode ierr; 142 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 143 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 144 145 PetscFunctionBegin; 146 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 147 its = its*lits; 148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 149 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 150 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 151 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 152 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 153 154 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 155 156 if (!m) PetscFunctionReturn(0); 157 diag = a->diag; 158 idiag = a->idiag; 159 k = PetscMax(A->rmap->n,A->cmap->n); 160 if (!a->mult_work) { 161 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 162 } 163 if (!a->sor_workt) { 164 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 165 } 166 if (!a->sor_work) { 167 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 168 } 169 work = a->mult_work; 170 t = a->sor_workt; 171 w = a->sor_work; 172 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 178 switch (bs) { 179 case 1: 180 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 181 t[0] = b[0]; 182 i2 = 1; 183 idiag += 1; 184 for (i=1; i<m; i++) { 185 v = aa + ai[i]; 186 vi = aj + ai[i]; 187 nz = diag[i] - ai[i]; 188 s[0] = b[i2]; 189 for (j=0; j<nz; j++) { 190 xw[0] = x[vi[j]]; 191 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 192 } 193 t[i2] = s[0]; 194 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 195 x[i2] = xw[0]; 196 idiag += 1; 197 i2 += 1; 198 } 199 break; 200 case 2: 201 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 202 t[0] = b[0]; t[1] = b[1]; 203 i2 = 2; 204 idiag += 4; 205 for (i=1; i<m; i++) { 206 v = aa + 4*ai[i]; 207 vi = aj + ai[i]; 208 nz = diag[i] - ai[i]; 209 s[0] = b[i2]; s[1] = b[i2+1]; 210 for (j=0; j<nz; j++) { 211 idx = 2*vi[j]; 212 it = 4*j; 213 xw[0] = x[idx]; xw[1] = x[1+idx]; 214 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 215 } 216 t[i2] = s[0]; t[i2+1] = s[1]; 217 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 218 x[i2] = xw[0]; x[i2+1] = xw[1]; 219 idiag += 4; 220 i2 += 2; 221 } 222 break; 223 case 3: 224 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 225 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 226 i2 = 3; 227 idiag += 9; 228 for (i=1; i<m; i++) { 229 v = aa + 9*ai[i]; 230 vi = aj + ai[i]; 231 nz = diag[i] - ai[i]; 232 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 233 while (nz--) { 234 idx = 3*(*vi++); 235 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 236 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 237 v += 9; 238 } 239 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 240 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 241 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 242 idiag += 9; 243 i2 += 3; 244 } 245 break; 246 case 4: 247 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 248 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 249 i2 = 4; 250 idiag += 16; 251 for (i=1; i<m; i++) { 252 v = aa + 16*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 256 while (nz--) { 257 idx = 4*(*vi++); 258 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 259 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 260 v += 16; 261 } 262 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 263 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 264 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 265 idiag += 16; 266 i2 += 4; 267 } 268 break; 269 case 5: 270 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 272 i2 = 5; 273 idiag += 25; 274 for (i=1; i<m; i++) { 275 v = aa + 25*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 279 while (nz--) { 280 idx = 5*(*vi++); 281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 282 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 283 v += 25; 284 } 285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 286 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 288 idiag += 25; 289 i2 += 5; 290 } 291 break; 292 case 6: 293 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 295 i2 = 6; 296 idiag += 36; 297 for (i=1; i<m; i++) { 298 v = aa + 36*ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 302 while (nz--) { 303 idx = 6*(*vi++); 304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 305 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 306 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 307 v += 36; 308 } 309 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 310 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 311 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 312 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 313 idiag += 36; 314 i2 += 6; 315 } 316 break; 317 case 7: 318 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 319 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 320 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 321 i2 = 7; 322 idiag += 49; 323 for (i=1; i<m; i++) { 324 v = aa + 49*ai[i]; 325 vi = aj + ai[i]; 326 nz = diag[i] - ai[i]; 327 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 328 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 329 while (nz--) { 330 idx = 7*(*vi++); 331 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 332 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 333 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 334 v += 49; 335 } 336 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 337 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 338 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 339 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 340 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 341 idiag += 49; 342 i2 += 7; 343 } 344 break; 345 default: 346 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 347 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 348 i2 = bs; 349 idiag += bs2; 350 for (i=1; i<m; i++) { 351 v = aa + bs2*ai[i]; 352 vi = aj + ai[i]; 353 nz = diag[i] - ai[i]; 354 355 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 356 /* copy all rows of x that are needed into contiguous space */ 357 workt = work; 358 for (j=0; j<nz; j++) { 359 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 360 workt += bs; 361 } 362 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 363 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 364 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 365 366 idiag += bs2; 367 i2 += bs; 368 } 369 break; 370 } 371 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 372 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 373 xb = t; 374 } 375 else xb = b; 376 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 377 idiag = a->idiag+bs2*(a->mbs-1); 378 i2 = bs * (m-1); 379 switch (bs) { 380 case 1: 381 s[0] = xb[i2]; 382 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 383 x[i2] = xw[0]; 384 i2 -= 1; 385 for (i=m-2; i>=0; i--) { 386 v = aa + (diag[i]+1); 387 vi = aj + diag[i] + 1; 388 nz = ai[i+1] - diag[i] - 1; 389 s[0] = xb[i2]; 390 for (j=0; j<nz; j++) { 391 xw[0] = x[vi[j]]; 392 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 393 } 394 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 395 x[i2] = xw[0]; 396 idiag -= 1; 397 i2 -= 1; 398 } 399 break; 400 case 2: 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 403 x[i2] = xw[0]; x[i2+1] = xw[1]; 404 i2 -= 2; 405 idiag -= 4; 406 for (i=m-2; i>=0; i--) { 407 v = aa + 4*(diag[i] + 1); 408 vi = aj + diag[i] + 1; 409 nz = ai[i+1] - diag[i] - 1; 410 s[0] = xb[i2]; s[1] = xb[i2+1]; 411 for (j=0; j<nz; j++) { 412 idx = 2*vi[j]; 413 it = 4*j; 414 xw[0] = x[idx]; xw[1] = x[1+idx]; 415 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 416 } 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 idiag -= 4; 420 i2 -= 2; 421 } 422 break; 423 case 3: 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 426 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 427 i2 -= 3; 428 idiag -= 9; 429 for (i=m-2; i>=0; i--) { 430 v = aa + 9*(diag[i]+1); 431 vi = aj + diag[i] + 1; 432 nz = ai[i+1] - diag[i] - 1; 433 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 434 while (nz--) { 435 idx = 3*(*vi++); 436 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 437 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 438 v += 9; 439 } 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 break; 446 case 4: 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 449 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 450 i2 -= 4; 451 idiag -= 16; 452 for (i=m-2; i>=0; i--) { 453 v = aa + 16*(diag[i]+1); 454 vi = aj + diag[i] + 1; 455 nz = ai[i+1] - diag[i] - 1; 456 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 457 while (nz--) { 458 idx = 4*(*vi++); 459 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 460 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 461 v += 16; 462 } 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 idiag -= 16; 466 i2 -= 4; 467 } 468 break; 469 case 5: 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 473 i2 -= 5; 474 idiag -= 25; 475 for (i=m-2; i>=0; i--) { 476 v = aa + 25*(diag[i]+1); 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 480 while (nz--) { 481 idx = 5*(*vi++); 482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 484 v += 25; 485 } 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 idiag -= 25; 489 i2 -= 5; 490 } 491 break; 492 case 6: 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 496 i2 -= 6; 497 idiag -= 36; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 36*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 503 while (nz--) { 504 idx = 6*(*vi++); 505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 506 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 507 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 508 v += 36; 509 } 510 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 512 idiag -= 36; 513 i2 -= 6; 514 } 515 break; 516 case 7: 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 518 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 519 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 520 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 521 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 522 i2 -= 7; 523 idiag -= 49; 524 for (i=m-2; i>=0; i--) { 525 v = aa + 49*(diag[i]+1); 526 vi = aj + diag[i] + 1; 527 nz = ai[i+1] - diag[i] - 1; 528 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 529 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 530 while (nz--) { 531 idx = 7*(*vi++); 532 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 533 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 534 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 535 v += 49; 536 } 537 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 539 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 540 idiag -= 49; 541 i2 -= 7; 542 } 543 break; 544 default: 545 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 546 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 547 i2 -= bs; 548 idiag -= bs2; 549 for (i=m-2; i>=0; i--) { 550 v = aa + bs2*(diag[i]+1); 551 vi = aj + diag[i] + 1; 552 nz = ai[i+1] - diag[i] - 1; 553 554 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 555 /* copy all rows of x that are needed into contiguous space */ 556 workt = work; 557 for (j=0; j<nz; j++) { 558 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 559 workt += bs; 560 } 561 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 562 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 563 564 idiag -= bs2; 565 i2 -= bs; 566 } 567 break; 568 } 569 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 570 } 571 its--; 572 } 573 while (its--) { 574 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 575 idiag = a->idiag; 576 i2 = 0; 577 switch (bs) { 578 case 1: 579 for (i=0; i<m; i++) { 580 v = aa + ai[i]; 581 vi = aj + ai[i]; 582 nz = ai[i+1] - ai[i]; 583 s[0] = b[i2]; 584 for (j=0; j<nz; j++) { 585 xw[0] = x[vi[j]]; 586 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 587 } 588 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 589 x[i2] += xw[0]; 590 idiag += 1; 591 i2 += 1; 592 } 593 break; 594 case 2: 595 for (i=0; i<m; i++) { 596 v = aa + 4*ai[i]; 597 vi = aj + ai[i]; 598 nz = ai[i+1] - ai[i]; 599 s[0] = b[i2]; s[1] = b[i2+1]; 600 for (j=0; j<nz; j++) { 601 idx = 2*vi[j]; 602 it = 4*j; 603 xw[0] = x[idx]; xw[1] = x[1+idx]; 604 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 605 } 606 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 607 x[i2] += xw[0]; x[i2+1] += xw[1]; 608 idiag += 4; 609 i2 += 2; 610 } 611 break; 612 case 3: 613 for (i=0; i<m; i++) { 614 v = aa + 9*ai[i]; 615 vi = aj + ai[i]; 616 nz = ai[i+1] - ai[i]; 617 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 618 while (nz--) { 619 idx = 3*(*vi++); 620 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 621 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 622 v += 9; 623 } 624 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 625 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 626 idiag += 9; 627 i2 += 3; 628 } 629 break; 630 case 4: 631 for (i=0; i<m; i++) { 632 v = aa + 16*ai[i]; 633 vi = aj + ai[i]; 634 nz = ai[i+1] - ai[i]; 635 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 636 while (nz--) { 637 idx = 4*(*vi++); 638 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 639 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 640 v += 16; 641 } 642 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 643 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 644 idiag += 16; 645 i2 += 4; 646 } 647 break; 648 case 5: 649 for (i=0; i<m; i++) { 650 v = aa + 25*ai[i]; 651 vi = aj + ai[i]; 652 nz = ai[i+1] - ai[i]; 653 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 654 while (nz--) { 655 idx = 5*(*vi++); 656 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 657 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 658 v += 25; 659 } 660 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 661 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 662 idiag += 25; 663 i2 += 5; 664 } 665 break; 666 case 6: 667 for (i=0; i<m; i++) { 668 v = aa + 36*ai[i]; 669 vi = aj + ai[i]; 670 nz = ai[i+1] - ai[i]; 671 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 672 while (nz--) { 673 idx = 6*(*vi++); 674 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 675 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 677 v += 36; 678 } 679 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 681 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 682 idiag += 36; 683 i2 += 6; 684 } 685 break; 686 case 7: 687 for (i=0; i<m; i++) { 688 v = aa + 49*ai[i]; 689 vi = aj + ai[i]; 690 nz = ai[i+1] - ai[i]; 691 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 692 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 693 while (nz--) { 694 idx = 7*(*vi++); 695 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 696 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 697 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 698 v += 49; 699 } 700 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 701 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 702 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 703 idiag += 49; 704 i2 += 7; 705 } 706 break; 707 default: 708 for (i=0; i<m; i++) { 709 v = aa + bs2*ai[i]; 710 vi = aj + ai[i]; 711 nz = ai[i+1] - ai[i]; 712 713 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 714 /* copy all rows of x that are needed into contiguous space */ 715 workt = work; 716 for (j=0; j<nz; j++) { 717 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 718 workt += bs; 719 } 720 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 721 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 722 723 idiag += bs2; 724 i2 += bs; 725 } 726 break; 727 } 728 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 729 } 730 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 731 idiag = a->idiag+bs2*(a->mbs-1); 732 i2 = bs * (m-1); 733 switch (bs) { 734 case 1: 735 for (i=m-1; i>=0; i--) { 736 v = aa + ai[i]; 737 vi = aj + ai[i]; 738 nz = ai[i+1] - ai[i]; 739 s[0] = b[i2]; 740 for (j=0; j<nz; j++) { 741 xw[0] = x[vi[j]]; 742 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 743 } 744 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 745 x[i2] += xw[0]; 746 idiag -= 1; 747 i2 -= 1; 748 } 749 break; 750 case 2: 751 for (i=m-1; i>=0; i--) { 752 v = aa + 4*ai[i]; 753 vi = aj + ai[i]; 754 nz = ai[i+1] - ai[i]; 755 s[0] = b[i2]; s[1] = b[i2+1]; 756 for (j=0; j<nz; j++) { 757 idx = 2*vi[j]; 758 it = 4*j; 759 xw[0] = x[idx]; xw[1] = x[1+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 761 } 762 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 763 x[i2] += xw[0]; x[i2+1] += xw[1]; 764 idiag -= 4; 765 i2 -= 2; 766 } 767 break; 768 case 3: 769 for (i=m-1; i>=0; i--) { 770 v = aa + 9*ai[i]; 771 vi = aj + ai[i]; 772 nz = ai[i+1] - ai[i]; 773 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 774 while (nz--) { 775 idx = 3*(*vi++); 776 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 777 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 778 v += 9; 779 } 780 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 781 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 782 idiag -= 9; 783 i2 -= 3; 784 } 785 break; 786 case 4: 787 for (i=m-1; i>=0; i--) { 788 v = aa + 16*ai[i]; 789 vi = aj + ai[i]; 790 nz = ai[i+1] - ai[i]; 791 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 792 while (nz--) { 793 idx = 4*(*vi++); 794 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 795 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 796 v += 16; 797 } 798 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 799 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 800 idiag -= 16; 801 i2 -= 4; 802 } 803 break; 804 case 5: 805 for (i=m-1; i>=0; i--) { 806 v = aa + 25*ai[i]; 807 vi = aj + ai[i]; 808 nz = ai[i+1] - ai[i]; 809 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 810 while (nz--) { 811 idx = 5*(*vi++); 812 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 813 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 814 v += 25; 815 } 816 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 817 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 818 idiag -= 25; 819 i2 -= 5; 820 } 821 break; 822 case 6: 823 for (i=m-1; i>=0; i--) { 824 v = aa + 36*ai[i]; 825 vi = aj + ai[i]; 826 nz = ai[i+1] - ai[i]; 827 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 828 while (nz--) { 829 idx = 6*(*vi++); 830 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 831 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 833 v += 36; 834 } 835 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 837 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 838 idiag -= 36; 839 i2 -= 6; 840 } 841 break; 842 case 7: 843 for (i=m-1; i>=0; i--) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = ai[i+1] - ai[i]; 847 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 848 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 849 while (nz--) { 850 idx = 7*(*vi++); 851 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 852 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 853 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 854 v += 49; 855 } 856 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 857 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 858 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 859 idiag -= 49; 860 i2 -= 7; 861 } 862 break; 863 default: 864 for (i=m-1; i>=0; i--) { 865 v = aa + bs2*ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i+1] - ai[i]; 868 869 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 870 /* copy all rows of x that are needed into contiguous space */ 871 workt = work; 872 for (j=0; j<nz; j++) { 873 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 874 workt += bs; 875 } 876 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 877 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 878 879 idiag -= bs2; 880 i2 -= bs; 881 } 882 break; 883 } 884 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 885 } 886 } 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 893 /* 894 Special version for direct calls from Fortran (Used in PETSc-fun3d) 895 */ 896 #if defined(PETSC_HAVE_FORTRAN_CAPS) 897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 899 #define matsetvaluesblocked4_ matsetvaluesblocked4 900 #endif 901 902 #undef __FUNCT__ 903 #define __FUNCT__ "matsetvaluesblocked4_" 904 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 905 { 906 Mat A = *AA; 907 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 908 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 909 PetscInt *ai =a->i,*ailen=a->ilen; 910 PetscInt *aj =a->j,stepval,lastcol = -1; 911 const PetscScalar *value = v; 912 MatScalar *ap,*aa = a->a,*bap; 913 914 PetscFunctionBegin; 915 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 916 stepval = (n-1)*4; 917 for (k=0; k<m; k++) { /* loop over added rows */ 918 row = im[k]; 919 rp = aj + ai[row]; 920 ap = aa + 16*ai[row]; 921 nrow = ailen[row]; 922 low = 0; 923 high = nrow; 924 for (l=0; l<n; l++) { /* loop over added columns */ 925 col = in[l]; 926 if (col <= lastcol) low = 0; 927 else high = nrow; 928 lastcol = col; 929 value = v + k*(stepval+4 + l)*4; 930 while (high-low > 7) { 931 t = (low+high)/2; 932 if (rp[t] > col) high = t; 933 else low = t; 934 } 935 for (i=low; i<high; i++) { 936 if (rp[i] > col) break; 937 if (rp[i] == col) { 938 bap = ap + 16*i; 939 for (ii=0; ii<4; ii++,value+=stepval) { 940 for (jj=ii; jj<16; jj+=4) { 941 bap[jj] += *value++; 942 } 943 } 944 goto noinsert2; 945 } 946 } 947 N = nrow++ - 1; 948 high++; /* added new column index thus must search to one higher than before */ 949 /* shift up all the later entries in this row */ 950 for (ii=N; ii>=i; ii--) { 951 rp[ii+1] = rp[ii]; 952 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 953 } 954 if (N >= i) { 955 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 956 } 957 rp[i] = col; 958 bap = ap + 16*i; 959 for (ii=0; ii<4; ii++,value+=stepval) { 960 for (jj=ii; jj<16; jj+=4) { 961 bap[jj] = *value++; 962 } 963 } 964 noinsert2:; 965 low = i; 966 } 967 ailen[row] = nrow; 968 } 969 PetscFunctionReturnVoid(); 970 } 971 972 #if defined(PETSC_HAVE_FORTRAN_CAPS) 973 #define matsetvalues4_ MATSETVALUES4 974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 975 #define matsetvalues4_ matsetvalues4 976 #endif 977 978 #undef __FUNCT__ 979 #define __FUNCT__ "MatSetValues4_" 980 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 981 { 982 Mat A = *AA; 983 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 984 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 985 PetscInt *ai=a->i,*ailen=a->ilen; 986 PetscInt *aj=a->j,brow,bcol; 987 PetscInt ridx,cidx,lastcol = -1; 988 MatScalar *ap,value,*aa=a->a,*bap; 989 990 PetscFunctionBegin; 991 for (k=0; k<m; k++) { /* loop over added rows */ 992 row = im[k]; brow = row/4; 993 rp = aj + ai[brow]; 994 ap = aa + 16*ai[brow]; 995 nrow = ailen[brow]; 996 low = 0; 997 high = nrow; 998 for (l=0; l<n; l++) { /* loop over added columns */ 999 col = in[l]; bcol = col/4; 1000 ridx = row % 4; cidx = col % 4; 1001 value = v[l + k*n]; 1002 if (col <= lastcol) low = 0; 1003 else high = nrow; 1004 lastcol = col; 1005 while (high-low > 7) { 1006 t = (low+high)/2; 1007 if (rp[t] > bcol) high = t; 1008 else low = t; 1009 } 1010 for (i=low; i<high; i++) { 1011 if (rp[i] > bcol) break; 1012 if (rp[i] == bcol) { 1013 bap = ap + 16*i + 4*cidx + ridx; 1014 *bap += value; 1015 goto noinsert1; 1016 } 1017 } 1018 N = nrow++ - 1; 1019 high++; /* added new column thus must search to one higher than before */ 1020 /* shift up all the later entries in this row */ 1021 for (ii=N; ii>=i; ii--) { 1022 rp[ii+1] = rp[ii]; 1023 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1024 } 1025 if (N>=i) { 1026 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1027 } 1028 rp[i] = bcol; 1029 ap[16*i + 4*cidx + ridx] = value; 1030 noinsert1:; 1031 low = i; 1032 } 1033 ailen[brow] = nrow; 1034 } 1035 PetscFunctionReturnVoid(); 1036 } 1037 1038 /* 1039 Checks for missing diagonals 1040 */ 1041 #undef __FUNCT__ 1042 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1043 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1044 { 1045 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1046 PetscErrorCode ierr; 1047 PetscInt *diag,*ii = a->i,i; 1048 1049 PetscFunctionBegin; 1050 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1051 *missing = PETSC_FALSE; 1052 if (A->rmap->n > 0 && !ii) { 1053 *missing = PETSC_TRUE; 1054 if (d) *d = 0; 1055 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1056 } else { 1057 diag = a->diag; 1058 for (i=0; i<a->mbs; i++) { 1059 if (diag[i] >= ii[i+1]) { 1060 *missing = PETSC_TRUE; 1061 if (d) *d = i; 1062 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1063 break; 1064 } 1065 } 1066 } 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1072 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1073 { 1074 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1075 PetscErrorCode ierr; 1076 PetscInt i,j,m = a->mbs; 1077 1078 PetscFunctionBegin; 1079 if (!a->diag) { 1080 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1081 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1082 a->free_diag = PETSC_TRUE; 1083 } 1084 for (i=0; i<m; i++) { 1085 a->diag[i] = a->i[i+1]; 1086 for (j=a->i[i]; j<a->i[i+1]; j++) { 1087 if (a->j[j] == i) { 1088 a->diag[i] = j; 1089 break; 1090 } 1091 } 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 1097 #undef __FUNCT__ 1098 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1099 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1100 { 1101 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1102 PetscErrorCode ierr; 1103 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1104 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1105 1106 PetscFunctionBegin; 1107 *nn = n; 1108 if (!ia) PetscFunctionReturn(0); 1109 if (symmetric) { 1110 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1111 nz = tia[n]; 1112 } else { 1113 tia = a->i; tja = a->j; 1114 } 1115 1116 if (!blockcompressed && bs > 1) { 1117 (*nn) *= bs; 1118 /* malloc & create the natural set of indices */ 1119 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1120 if (n) { 1121 (*ia)[0] = 0; 1122 for (j=1; j<bs; j++) { 1123 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1124 } 1125 } 1126 1127 for (i=1; i<n; i++) { 1128 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1129 for (j=1; j<bs; j++) { 1130 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1131 } 1132 } 1133 if (n) { 1134 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1135 } 1136 1137 if (inja) { 1138 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1139 cnt = 0; 1140 for (i=0; i<n; i++) { 1141 for (j=0; j<bs; j++) { 1142 for (k=tia[i]; k<tia[i+1]; k++) { 1143 for (l=0; l<bs; l++) { 1144 (*ja)[cnt++] = bs*tja[k] + l; 1145 } 1146 } 1147 } 1148 } 1149 } 1150 1151 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1152 ierr = PetscFree(tia);CHKERRQ(ierr); 1153 ierr = PetscFree(tja);CHKERRQ(ierr); 1154 } 1155 } else if (oshift == 1) { 1156 if (symmetric) { 1157 nz = tia[A->rmap->n/bs]; 1158 /* add 1 to i and j indices */ 1159 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1160 *ia = tia; 1161 if (ja) { 1162 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1163 *ja = tja; 1164 } 1165 } else { 1166 nz = a->i[A->rmap->n/bs]; 1167 /* malloc space and add 1 to i and j indices */ 1168 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1169 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1170 if (ja) { 1171 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1172 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1173 } 1174 } 1175 } else { 1176 *ia = tia; 1177 if (ja) *ja = tja; 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1184 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1185 { 1186 PetscErrorCode ierr; 1187 1188 PetscFunctionBegin; 1189 if (!ia) PetscFunctionReturn(0); 1190 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1191 ierr = PetscFree(*ia);CHKERRQ(ierr); 1192 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1193 } 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1199 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1200 { 1201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1202 PetscErrorCode ierr; 1203 1204 PetscFunctionBegin; 1205 #if defined(PETSC_USE_LOG) 1206 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1207 #endif 1208 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1209 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1210 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1211 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1212 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1213 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1214 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1215 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1216 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1217 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1218 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1219 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1220 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1221 1222 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1223 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1224 ierr = PetscFree(A->data);CHKERRQ(ierr); 1225 1226 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1231 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1232 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1233 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1234 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1235 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1236 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1242 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1243 { 1244 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1245 PetscErrorCode ierr; 1246 1247 PetscFunctionBegin; 1248 switch (op) { 1249 case MAT_ROW_ORIENTED: 1250 a->roworiented = flg; 1251 break; 1252 case MAT_KEEP_NONZERO_PATTERN: 1253 a->keepnonzeropattern = flg; 1254 break; 1255 case MAT_NEW_NONZERO_LOCATIONS: 1256 a->nonew = (flg ? 0 : 1); 1257 break; 1258 case MAT_NEW_NONZERO_LOCATION_ERR: 1259 a->nonew = (flg ? -1 : 0); 1260 break; 1261 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1262 a->nonew = (flg ? -2 : 0); 1263 break; 1264 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1265 a->nounused = (flg ? -1 : 0); 1266 break; 1267 case MAT_NEW_DIAGONALS: 1268 case MAT_IGNORE_OFF_PROC_ENTRIES: 1269 case MAT_USE_HASH_TABLE: 1270 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1271 break; 1272 case MAT_SPD: 1273 case MAT_SYMMETRIC: 1274 case MAT_STRUCTURALLY_SYMMETRIC: 1275 case MAT_HERMITIAN: 1276 case MAT_SYMMETRY_ETERNAL: 1277 /* These options are handled directly by MatSetOption() */ 1278 break; 1279 default: 1280 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1281 } 1282 PetscFunctionReturn(0); 1283 } 1284 1285 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1288 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1289 { 1290 PetscErrorCode ierr; 1291 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1292 MatScalar *aa_i; 1293 PetscScalar *v_i; 1294 1295 PetscFunctionBegin; 1296 bs = A->rmap->bs; 1297 bs2 = bs*bs; 1298 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1299 1300 bn = row/bs; /* Block number */ 1301 bp = row % bs; /* Block Position */ 1302 M = ai[bn+1] - ai[bn]; 1303 *nz = bs*M; 1304 1305 if (v) { 1306 *v = 0; 1307 if (*nz) { 1308 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1309 for (i=0; i<M; i++) { /* for each block in the block row */ 1310 v_i = *v + i*bs; 1311 aa_i = aa + bs2*(ai[bn] + i); 1312 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1313 } 1314 } 1315 } 1316 1317 if (idx) { 1318 *idx = 0; 1319 if (*nz) { 1320 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1321 for (i=0; i<M; i++) { /* for each block in the block row */ 1322 idx_i = *idx + i*bs; 1323 itmp = bs*aj[ai[bn] + i]; 1324 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1325 } 1326 } 1327 } 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1333 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1334 { 1335 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1345 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1346 { 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1351 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1352 PetscFunctionReturn(0); 1353 } 1354 1355 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1356 1357 #undef __FUNCT__ 1358 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1359 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1360 { 1361 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1362 Mat C; 1363 PetscErrorCode ierr; 1364 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1365 PetscInt *rows,*cols,bs2=a->bs2; 1366 MatScalar *array; 1367 1368 PetscFunctionBegin; 1369 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1370 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1371 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1372 1373 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1374 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1375 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1376 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1377 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1378 ierr = PetscFree(col);CHKERRQ(ierr); 1379 } else { 1380 C = *B; 1381 } 1382 1383 array = a->a; 1384 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1385 for (i=0; i<mbs; i++) { 1386 cols[0] = i*bs; 1387 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1388 len = ai[i+1] - ai[i]; 1389 for (j=0; j<len; j++) { 1390 rows[0] = (*aj++)*bs; 1391 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1392 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1393 array += bs2; 1394 } 1395 } 1396 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1397 1398 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1399 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1400 1401 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1402 *B = C; 1403 } else { 1404 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1405 } 1406 PetscFunctionReturn(0); 1407 } 1408 1409 #undef __FUNCT__ 1410 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1411 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1412 { 1413 PetscErrorCode ierr; 1414 Mat Btrans; 1415 1416 PetscFunctionBegin; 1417 *f = PETSC_FALSE; 1418 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1419 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1420 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNCT__ 1425 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1426 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1427 { 1428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1429 PetscErrorCode ierr; 1430 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1431 int fd; 1432 PetscScalar *aa; 1433 FILE *file; 1434 1435 PetscFunctionBegin; 1436 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1437 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1438 col_lens[0] = MAT_FILE_CLASSID; 1439 1440 col_lens[1] = A->rmap->N; 1441 col_lens[2] = A->cmap->n; 1442 col_lens[3] = a->nz*bs2; 1443 1444 /* store lengths of each row and write (including header) to file */ 1445 count = 0; 1446 for (i=0; i<a->mbs; i++) { 1447 for (j=0; j<bs; j++) { 1448 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1449 } 1450 } 1451 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1452 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1453 1454 /* store column indices (zero start index) */ 1455 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1456 count = 0; 1457 for (i=0; i<a->mbs; i++) { 1458 for (j=0; j<bs; j++) { 1459 for (k=a->i[i]; k<a->i[i+1]; k++) { 1460 for (l=0; l<bs; l++) { 1461 jj[count++] = bs*a->j[k] + l; 1462 } 1463 } 1464 } 1465 } 1466 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1467 ierr = PetscFree(jj);CHKERRQ(ierr); 1468 1469 /* store nonzero values */ 1470 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1471 count = 0; 1472 for (i=0; i<a->mbs; i++) { 1473 for (j=0; j<bs; j++) { 1474 for (k=a->i[i]; k<a->i[i+1]; k++) { 1475 for (l=0; l<bs; l++) { 1476 aa[count++] = a->a[bs2*k + l*bs + j]; 1477 } 1478 } 1479 } 1480 } 1481 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1482 ierr = PetscFree(aa);CHKERRQ(ierr); 1483 1484 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1485 if (file) { 1486 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1487 } 1488 PetscFunctionReturn(0); 1489 } 1490 1491 #undef __FUNCT__ 1492 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1493 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1494 { 1495 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1496 PetscErrorCode ierr; 1497 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1498 PetscViewerFormat format; 1499 1500 PetscFunctionBegin; 1501 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1502 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1503 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1504 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1505 const char *matname; 1506 Mat aij; 1507 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1508 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1509 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1510 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1511 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1512 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1513 PetscFunctionReturn(0); 1514 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1515 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1516 for (i=0; i<a->mbs; i++) { 1517 for (j=0; j<bs; j++) { 1518 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1519 for (k=a->i[i]; k<a->i[i+1]; k++) { 1520 for (l=0; l<bs; l++) { 1521 #if defined(PETSC_USE_COMPLEX) 1522 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1523 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1524 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1525 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1526 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1527 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1528 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1529 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1530 } 1531 #else 1532 if (a->a[bs2*k + l*bs + j] != 0.0) { 1533 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1534 } 1535 #endif 1536 } 1537 } 1538 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1539 } 1540 } 1541 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1542 } else { 1543 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1544 for (i=0; i<a->mbs; i++) { 1545 for (j=0; j<bs; j++) { 1546 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1547 for (k=a->i[i]; k<a->i[i+1]; k++) { 1548 for (l=0; l<bs; l++) { 1549 #if defined(PETSC_USE_COMPLEX) 1550 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1551 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1552 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1553 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1554 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1555 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1556 } else { 1557 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1558 } 1559 #else 1560 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1561 #endif 1562 } 1563 } 1564 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1565 } 1566 } 1567 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1568 } 1569 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #include <petscdraw.h> 1574 #undef __FUNCT__ 1575 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1576 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1577 { 1578 Mat A = (Mat) Aa; 1579 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1580 PetscErrorCode ierr; 1581 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1582 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1583 MatScalar *aa; 1584 PetscViewer viewer; 1585 PetscViewerFormat format; 1586 1587 PetscFunctionBegin; 1588 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1589 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1590 1591 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1592 1593 /* loop over matrix elements drawing boxes */ 1594 1595 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1596 color = PETSC_DRAW_BLUE; 1597 for (i=0,row=0; i<mbs; i++,row+=bs) { 1598 for (j=a->i[i]; j<a->i[i+1]; j++) { 1599 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1600 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1601 aa = a->a + j*bs2; 1602 for (k=0; k<bs; k++) { 1603 for (l=0; l<bs; l++) { 1604 if (PetscRealPart(*aa++) >= 0.) continue; 1605 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1606 } 1607 } 1608 } 1609 } 1610 color = PETSC_DRAW_CYAN; 1611 for (i=0,row=0; i<mbs; i++,row+=bs) { 1612 for (j=a->i[i]; j<a->i[i+1]; j++) { 1613 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1614 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1615 aa = a->a + j*bs2; 1616 for (k=0; k<bs; k++) { 1617 for (l=0; l<bs; l++) { 1618 if (PetscRealPart(*aa++) != 0.) continue; 1619 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1620 } 1621 } 1622 } 1623 } 1624 color = PETSC_DRAW_RED; 1625 for (i=0,row=0; i<mbs; i++,row+=bs) { 1626 for (j=a->i[i]; j<a->i[i+1]; j++) { 1627 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1628 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1629 aa = a->a + j*bs2; 1630 for (k=0; k<bs; k++) { 1631 for (l=0; l<bs; l++) { 1632 if (PetscRealPart(*aa++) <= 0.) continue; 1633 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1634 } 1635 } 1636 } 1637 } 1638 } else { 1639 /* use contour shading to indicate magnitude of values */ 1640 /* first determine max of all nonzero values */ 1641 PetscDraw popup; 1642 PetscReal scale,maxv = 0.0; 1643 1644 for (i=0; i<a->nz*a->bs2; i++) { 1645 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1646 } 1647 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1648 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1649 if (popup) { 1650 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1651 } 1652 for (i=0,row=0; i<mbs; i++,row+=bs) { 1653 for (j=a->i[i]; j<a->i[i+1]; j++) { 1654 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1655 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1656 aa = a->a + j*bs2; 1657 for (k=0; k<bs; k++) { 1658 for (l=0; l<bs; l++) { 1659 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1660 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1661 } 1662 } 1663 } 1664 } 1665 } 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1671 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1672 { 1673 PetscErrorCode ierr; 1674 PetscReal xl,yl,xr,yr,w,h; 1675 PetscDraw draw; 1676 PetscBool isnull; 1677 1678 PetscFunctionBegin; 1679 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1680 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1681 1682 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1683 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1684 xr += w; yr += h; xl = -w; yl = -h; 1685 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1686 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1687 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1688 PetscFunctionReturn(0); 1689 } 1690 1691 #undef __FUNCT__ 1692 #define __FUNCT__ "MatView_SeqBAIJ" 1693 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1694 { 1695 PetscErrorCode ierr; 1696 PetscBool iascii,isbinary,isdraw; 1697 1698 PetscFunctionBegin; 1699 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1700 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1701 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1702 if (iascii) { 1703 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1704 } else if (isbinary) { 1705 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1706 } else if (isdraw) { 1707 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1708 } else { 1709 Mat B; 1710 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1711 ierr = MatView(B,viewer);CHKERRQ(ierr); 1712 ierr = MatDestroy(&B);CHKERRQ(ierr); 1713 } 1714 PetscFunctionReturn(0); 1715 } 1716 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1720 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1721 { 1722 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1723 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1724 PetscInt *ai = a->i,*ailen = a->ilen; 1725 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1726 MatScalar *ap,*aa = a->a; 1727 1728 PetscFunctionBegin; 1729 for (k=0; k<m; k++) { /* loop over rows */ 1730 row = im[k]; brow = row/bs; 1731 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1732 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1733 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1734 nrow = ailen[brow]; 1735 for (l=0; l<n; l++) { /* loop over columns */ 1736 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1737 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1738 col = in[l]; 1739 bcol = col/bs; 1740 cidx = col%bs; 1741 ridx = row%bs; 1742 high = nrow; 1743 low = 0; /* assume unsorted */ 1744 while (high-low > 5) { 1745 t = (low+high)/2; 1746 if (rp[t] > bcol) high = t; 1747 else low = t; 1748 } 1749 for (i=low; i<high; i++) { 1750 if (rp[i] > bcol) break; 1751 if (rp[i] == bcol) { 1752 *v++ = ap[bs2*i+bs*cidx+ridx]; 1753 goto finished; 1754 } 1755 } 1756 *v++ = 0.0; 1757 finished:; 1758 } 1759 } 1760 PetscFunctionReturn(0); 1761 } 1762 1763 #undef __FUNCT__ 1764 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1765 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1766 { 1767 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1768 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1769 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1770 PetscErrorCode ierr; 1771 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1772 PetscBool roworiented=a->roworiented; 1773 const PetscScalar *value = v; 1774 MatScalar *ap,*aa = a->a,*bap; 1775 1776 PetscFunctionBegin; 1777 if (roworiented) { 1778 stepval = (n-1)*bs; 1779 } else { 1780 stepval = (m-1)*bs; 1781 } 1782 for (k=0; k<m; k++) { /* loop over added rows */ 1783 row = im[k]; 1784 if (row < 0) continue; 1785 #if defined(PETSC_USE_DEBUG) 1786 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1787 #endif 1788 rp = aj + ai[row]; 1789 ap = aa + bs2*ai[row]; 1790 rmax = imax[row]; 1791 nrow = ailen[row]; 1792 low = 0; 1793 high = nrow; 1794 for (l=0; l<n; l++) { /* loop over added columns */ 1795 if (in[l] < 0) continue; 1796 #if defined(PETSC_USE_DEBUG) 1797 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1798 #endif 1799 col = in[l]; 1800 if (roworiented) { 1801 value = v + (k*(stepval+bs) + l)*bs; 1802 } else { 1803 value = v + (l*(stepval+bs) + k)*bs; 1804 } 1805 if (col <= lastcol) low = 0; 1806 else high = nrow; 1807 lastcol = col; 1808 while (high-low > 7) { 1809 t = (low+high)/2; 1810 if (rp[t] > col) high = t; 1811 else low = t; 1812 } 1813 for (i=low; i<high; i++) { 1814 if (rp[i] > col) break; 1815 if (rp[i] == col) { 1816 bap = ap + bs2*i; 1817 if (roworiented) { 1818 if (is == ADD_VALUES) { 1819 for (ii=0; ii<bs; ii++,value+=stepval) { 1820 for (jj=ii; jj<bs2; jj+=bs) { 1821 bap[jj] += *value++; 1822 } 1823 } 1824 } else { 1825 for (ii=0; ii<bs; ii++,value+=stepval) { 1826 for (jj=ii; jj<bs2; jj+=bs) { 1827 bap[jj] = *value++; 1828 } 1829 } 1830 } 1831 } else { 1832 if (is == ADD_VALUES) { 1833 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1834 for (jj=0; jj<bs; jj++) { 1835 bap[jj] += value[jj]; 1836 } 1837 bap += bs; 1838 } 1839 } else { 1840 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1841 for (jj=0; jj<bs; jj++) { 1842 bap[jj] = value[jj]; 1843 } 1844 bap += bs; 1845 } 1846 } 1847 } 1848 goto noinsert2; 1849 } 1850 } 1851 if (nonew == 1) goto noinsert2; 1852 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1853 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1854 N = nrow++ - 1; high++; 1855 /* shift up all the later entries in this row */ 1856 for (ii=N; ii>=i; ii--) { 1857 rp[ii+1] = rp[ii]; 1858 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1859 } 1860 if (N >= i) { 1861 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1862 } 1863 rp[i] = col; 1864 bap = ap + bs2*i; 1865 if (roworiented) { 1866 for (ii=0; ii<bs; ii++,value+=stepval) { 1867 for (jj=ii; jj<bs2; jj+=bs) { 1868 bap[jj] = *value++; 1869 } 1870 } 1871 } else { 1872 for (ii=0; ii<bs; ii++,value+=stepval) { 1873 for (jj=0; jj<bs; jj++) { 1874 *bap++ = *value++; 1875 } 1876 } 1877 } 1878 noinsert2:; 1879 low = i; 1880 } 1881 ailen[row] = nrow; 1882 } 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1888 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1889 { 1890 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1891 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1892 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1893 PetscErrorCode ierr; 1894 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1895 MatScalar *aa = a->a,*ap; 1896 PetscReal ratio=0.6; 1897 1898 PetscFunctionBegin; 1899 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1900 1901 if (m) rmax = ailen[0]; 1902 for (i=1; i<mbs; i++) { 1903 /* move each row back by the amount of empty slots (fshift) before it*/ 1904 fshift += imax[i-1] - ailen[i-1]; 1905 rmax = PetscMax(rmax,ailen[i]); 1906 if (fshift) { 1907 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1908 N = ailen[i]; 1909 for (j=0; j<N; j++) { 1910 ip[j-fshift] = ip[j]; 1911 1912 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1913 } 1914 } 1915 ai[i] = ai[i-1] + ailen[i-1]; 1916 } 1917 if (mbs) { 1918 fshift += imax[mbs-1] - ailen[mbs-1]; 1919 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1920 } 1921 1922 /* reset ilen and imax for each row */ 1923 a->nonzerorowcnt = 0; 1924 for (i=0; i<mbs; i++) { 1925 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1926 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1927 } 1928 a->nz = ai[mbs]; 1929 1930 /* diagonals may have moved, so kill the diagonal pointers */ 1931 a->idiagvalid = PETSC_FALSE; 1932 if (fshift && a->diag) { 1933 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1934 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1935 a->diag = 0; 1936 } 1937 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1938 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1939 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1940 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1941 1942 A->info.mallocs += a->reallocs; 1943 a->reallocs = 0; 1944 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1945 a->rmax = rmax; 1946 1947 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 /* 1952 This function returns an array of flags which indicate the locations of contiguous 1953 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1954 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1955 Assume: sizes should be long enough to hold all the values. 1956 */ 1957 #undef __FUNCT__ 1958 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1959 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1960 { 1961 PetscInt i,j,k,row; 1962 PetscBool flg; 1963 1964 PetscFunctionBegin; 1965 for (i=0,j=0; i<n; j++) { 1966 row = idx[i]; 1967 if (row%bs!=0) { /* Not the begining of a block */ 1968 sizes[j] = 1; 1969 i++; 1970 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1971 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1972 i++; 1973 } else { /* Begining of the block, so check if the complete block exists */ 1974 flg = PETSC_TRUE; 1975 for (k=1; k<bs; k++) { 1976 if (row+k != idx[i+k]) { /* break in the block */ 1977 flg = PETSC_FALSE; 1978 break; 1979 } 1980 } 1981 if (flg) { /* No break in the bs */ 1982 sizes[j] = bs; 1983 i += bs; 1984 } else { 1985 sizes[j] = 1; 1986 i++; 1987 } 1988 } 1989 } 1990 *bs_max = j; 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #undef __FUNCT__ 1995 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1996 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 1997 { 1998 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1999 PetscErrorCode ierr; 2000 PetscInt i,j,k,count,*rows; 2001 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2002 PetscScalar zero = 0.0; 2003 MatScalar *aa; 2004 const PetscScalar *xx; 2005 PetscScalar *bb; 2006 2007 PetscFunctionBegin; 2008 /* fix right hand side if needed */ 2009 if (x && b) { 2010 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2011 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2012 for (i=0; i<is_n; i++) { 2013 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2014 } 2015 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2016 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2017 } 2018 2019 /* Make a copy of the IS and sort it */ 2020 /* allocate memory for rows,sizes */ 2021 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2022 2023 /* copy IS values to rows, and sort them */ 2024 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2025 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2026 2027 if (baij->keepnonzeropattern) { 2028 for (i=0; i<is_n; i++) sizes[i] = 1; 2029 bs_max = is_n; 2030 } else { 2031 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2032 A->nonzerostate++; 2033 } 2034 2035 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2036 row = rows[j]; 2037 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2038 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2039 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2040 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2041 if (diag != (PetscScalar)0.0) { 2042 if (baij->ilen[row/bs] > 0) { 2043 baij->ilen[row/bs] = 1; 2044 baij->j[baij->i[row/bs]] = row/bs; 2045 2046 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2047 } 2048 /* Now insert all the diagonal values for this bs */ 2049 for (k=0; k<bs; k++) { 2050 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2051 } 2052 } else { /* (diag == 0.0) */ 2053 baij->ilen[row/bs] = 0; 2054 } /* end (diag == 0.0) */ 2055 } else { /* (sizes[i] != bs) */ 2056 #if defined(PETSC_USE_DEBUG) 2057 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2058 #endif 2059 for (k=0; k<count; k++) { 2060 aa[0] = zero; 2061 aa += bs; 2062 } 2063 if (diag != (PetscScalar)0.0) { 2064 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2065 } 2066 } 2067 } 2068 2069 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2070 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2076 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2077 { 2078 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2079 PetscErrorCode ierr; 2080 PetscInt i,j,k,count; 2081 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2082 PetscScalar zero = 0.0; 2083 MatScalar *aa; 2084 const PetscScalar *xx; 2085 PetscScalar *bb; 2086 PetscBool *zeroed,vecs = PETSC_FALSE; 2087 2088 PetscFunctionBegin; 2089 /* fix right hand side if needed */ 2090 if (x && b) { 2091 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2092 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2093 vecs = PETSC_TRUE; 2094 } 2095 2096 /* zero the columns */ 2097 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2098 for (i=0; i<is_n; i++) { 2099 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2100 zeroed[is_idx[i]] = PETSC_TRUE; 2101 } 2102 for (i=0; i<A->rmap->N; i++) { 2103 if (!zeroed[i]) { 2104 row = i/bs; 2105 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2106 for (k=0; k<bs; k++) { 2107 col = bs*baij->j[j] + k; 2108 if (zeroed[col]) { 2109 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2110 if (vecs) bb[i] -= aa[0]*xx[col]; 2111 aa[0] = 0.0; 2112 } 2113 } 2114 } 2115 } else if (vecs) bb[i] = diag*xx[i]; 2116 } 2117 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2118 if (vecs) { 2119 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2120 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2121 } 2122 2123 /* zero the rows */ 2124 for (i=0; i<is_n; i++) { 2125 row = is_idx[i]; 2126 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2127 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2128 for (k=0; k<count; k++) { 2129 aa[0] = zero; 2130 aa += bs; 2131 } 2132 if (diag != (PetscScalar)0.0) { 2133 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2134 } 2135 } 2136 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2137 PetscFunctionReturn(0); 2138 } 2139 2140 #undef __FUNCT__ 2141 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2142 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2143 { 2144 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2145 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2146 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2147 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2148 PetscErrorCode ierr; 2149 PetscInt ridx,cidx,bs2=a->bs2; 2150 PetscBool roworiented=a->roworiented; 2151 MatScalar *ap,value,*aa=a->a,*bap; 2152 2153 PetscFunctionBegin; 2154 for (k=0; k<m; k++) { /* loop over added rows */ 2155 row = im[k]; 2156 brow = row/bs; 2157 if (row < 0) continue; 2158 #if defined(PETSC_USE_DEBUG) 2159 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2160 #endif 2161 rp = aj + ai[brow]; 2162 ap = aa + bs2*ai[brow]; 2163 rmax = imax[brow]; 2164 nrow = ailen[brow]; 2165 low = 0; 2166 high = nrow; 2167 for (l=0; l<n; l++) { /* loop over added columns */ 2168 if (in[l] < 0) continue; 2169 #if defined(PETSC_USE_DEBUG) 2170 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2171 #endif 2172 col = in[l]; bcol = col/bs; 2173 ridx = row % bs; cidx = col % bs; 2174 if (roworiented) { 2175 value = v[l + k*n]; 2176 } else { 2177 value = v[k + l*m]; 2178 } 2179 if (col <= lastcol) low = 0; else high = nrow; 2180 lastcol = col; 2181 while (high-low > 7) { 2182 t = (low+high)/2; 2183 if (rp[t] > bcol) high = t; 2184 else low = t; 2185 } 2186 for (i=low; i<high; i++) { 2187 if (rp[i] > bcol) break; 2188 if (rp[i] == bcol) { 2189 bap = ap + bs2*i + bs*cidx + ridx; 2190 if (is == ADD_VALUES) *bap += value; 2191 else *bap = value; 2192 goto noinsert1; 2193 } 2194 } 2195 if (nonew == 1) goto noinsert1; 2196 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2197 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2198 N = nrow++ - 1; high++; 2199 /* shift up all the later entries in this row */ 2200 for (ii=N; ii>=i; ii--) { 2201 rp[ii+1] = rp[ii]; 2202 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2203 } 2204 if (N>=i) { 2205 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2206 } 2207 rp[i] = bcol; 2208 ap[bs2*i + bs*cidx + ridx] = value; 2209 a->nz++; 2210 A->nonzerostate++; 2211 noinsert1:; 2212 low = i; 2213 } 2214 ailen[brow] = nrow; 2215 } 2216 PetscFunctionReturn(0); 2217 } 2218 2219 #undef __FUNCT__ 2220 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2221 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2222 { 2223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2224 Mat outA; 2225 PetscErrorCode ierr; 2226 PetscBool row_identity,col_identity; 2227 2228 PetscFunctionBegin; 2229 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2230 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2231 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2232 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2233 2234 outA = inA; 2235 inA->factortype = MAT_FACTOR_LU; 2236 2237 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2238 2239 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2240 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2241 a->row = row; 2242 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2243 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2244 a->col = col; 2245 2246 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2247 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2248 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2249 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2250 2251 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2252 if (!a->solve_work) { 2253 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2254 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2255 } 2256 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2257 PetscFunctionReturn(0); 2258 } 2259 2260 #undef __FUNCT__ 2261 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2262 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2263 { 2264 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2265 PetscInt i,nz,mbs; 2266 2267 PetscFunctionBegin; 2268 nz = baij->maxnz; 2269 mbs = baij->mbs; 2270 for (i=0; i<nz; i++) { 2271 baij->j[i] = indices[i]; 2272 } 2273 baij->nz = nz; 2274 for (i=0; i<mbs; i++) { 2275 baij->ilen[i] = baij->imax[i]; 2276 } 2277 PetscFunctionReturn(0); 2278 } 2279 2280 #undef __FUNCT__ 2281 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2282 /*@ 2283 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2284 in the matrix. 2285 2286 Input Parameters: 2287 + mat - the SeqBAIJ matrix 2288 - indices - the column indices 2289 2290 Level: advanced 2291 2292 Notes: 2293 This can be called if you have precomputed the nonzero structure of the 2294 matrix and want to provide it to the matrix object to improve the performance 2295 of the MatSetValues() operation. 2296 2297 You MUST have set the correct numbers of nonzeros per row in the call to 2298 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2299 2300 MUST be called before any calls to MatSetValues(); 2301 2302 @*/ 2303 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2304 { 2305 PetscErrorCode ierr; 2306 2307 PetscFunctionBegin; 2308 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2309 PetscValidPointer(indices,2); 2310 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2311 PetscFunctionReturn(0); 2312 } 2313 2314 #undef __FUNCT__ 2315 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2316 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2317 { 2318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2319 PetscErrorCode ierr; 2320 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2321 PetscReal atmp; 2322 PetscScalar *x,zero = 0.0; 2323 MatScalar *aa; 2324 PetscInt ncols,brow,krow,kcol; 2325 2326 PetscFunctionBegin; 2327 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2328 bs = A->rmap->bs; 2329 aa = a->a; 2330 ai = a->i; 2331 aj = a->j; 2332 mbs = a->mbs; 2333 2334 ierr = VecSet(v,zero);CHKERRQ(ierr); 2335 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2336 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2337 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2338 for (i=0; i<mbs; i++) { 2339 ncols = ai[1] - ai[0]; ai++; 2340 brow = bs*i; 2341 for (j=0; j<ncols; j++) { 2342 for (kcol=0; kcol<bs; kcol++) { 2343 for (krow=0; krow<bs; krow++) { 2344 atmp = PetscAbsScalar(*aa);aa++; 2345 row = brow + krow; /* row index */ 2346 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2347 } 2348 } 2349 aj++; 2350 } 2351 } 2352 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2353 PetscFunctionReturn(0); 2354 } 2355 2356 #undef __FUNCT__ 2357 #define __FUNCT__ "MatCopy_SeqBAIJ" 2358 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2359 { 2360 PetscErrorCode ierr; 2361 2362 PetscFunctionBegin; 2363 /* If the two matrices have the same copy implementation, use fast copy. */ 2364 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2366 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2367 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2368 2369 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2370 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2371 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2372 } else { 2373 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2374 } 2375 PetscFunctionReturn(0); 2376 } 2377 2378 #undef __FUNCT__ 2379 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2380 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2381 { 2382 PetscErrorCode ierr; 2383 2384 PetscFunctionBegin; 2385 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 #undef __FUNCT__ 2390 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2391 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2392 { 2393 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2394 2395 PetscFunctionBegin; 2396 *array = a->a; 2397 PetscFunctionReturn(0); 2398 } 2399 2400 #undef __FUNCT__ 2401 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2402 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2403 { 2404 PetscFunctionBegin; 2405 PetscFunctionReturn(0); 2406 } 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2410 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2411 { 2412 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2413 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2414 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2415 PetscErrorCode ierr; 2416 2417 PetscFunctionBegin; 2418 /* Set the number of nonzeros in the new matrix */ 2419 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #undef __FUNCT__ 2424 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2425 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2426 { 2427 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2428 PetscErrorCode ierr; 2429 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2430 PetscBLASInt one=1; 2431 2432 PetscFunctionBegin; 2433 if (str == SAME_NONZERO_PATTERN) { 2434 PetscScalar alpha = a; 2435 PetscBLASInt bnz; 2436 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2437 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2438 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2439 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2440 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2441 } else { 2442 Mat B; 2443 PetscInt *nnz; 2444 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2445 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2446 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2447 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2448 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2449 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2450 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2451 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2452 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2453 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2454 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2455 ierr = PetscFree(nnz);CHKERRQ(ierr); 2456 } 2457 PetscFunctionReturn(0); 2458 } 2459 2460 #undef __FUNCT__ 2461 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2462 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2463 { 2464 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2465 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2466 MatScalar *aa = a->a; 2467 2468 PetscFunctionBegin; 2469 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2475 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2476 { 2477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2478 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2479 MatScalar *aa = a->a; 2480 2481 PetscFunctionBegin; 2482 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2488 /* 2489 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2490 */ 2491 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2492 { 2493 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2494 PetscErrorCode ierr; 2495 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2496 PetscInt nz = a->i[m],row,*jj,mr,col; 2497 2498 PetscFunctionBegin; 2499 *nn = n; 2500 if (!ia) PetscFunctionReturn(0); 2501 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2502 else { 2503 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2504 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2505 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2506 jj = a->j; 2507 for (i=0; i<nz; i++) { 2508 collengths[jj[i]]++; 2509 } 2510 cia[0] = oshift; 2511 for (i=0; i<n; i++) { 2512 cia[i+1] = cia[i] + collengths[i]; 2513 } 2514 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2515 jj = a->j; 2516 for (row=0; row<m; row++) { 2517 mr = a->i[row+1] - a->i[row]; 2518 for (i=0; i<mr; i++) { 2519 col = *jj++; 2520 2521 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2522 } 2523 } 2524 ierr = PetscFree(collengths);CHKERRQ(ierr); 2525 *ia = cia; *ja = cja; 2526 } 2527 PetscFunctionReturn(0); 2528 } 2529 2530 #undef __FUNCT__ 2531 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2532 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2533 { 2534 PetscErrorCode ierr; 2535 2536 PetscFunctionBegin; 2537 if (!ia) PetscFunctionReturn(0); 2538 ierr = PetscFree(*ia);CHKERRQ(ierr); 2539 ierr = PetscFree(*ja);CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 /* 2544 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2545 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2546 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2547 */ 2548 #undef __FUNCT__ 2549 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2550 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2551 { 2552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2553 PetscErrorCode ierr; 2554 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2555 PetscInt nz = a->i[m],row,*jj,mr,col; 2556 PetscInt *cspidx; 2557 2558 PetscFunctionBegin; 2559 *nn = n; 2560 if (!ia) PetscFunctionReturn(0); 2561 2562 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2563 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2564 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2565 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2566 jj = a->j; 2567 for (i=0; i<nz; i++) { 2568 collengths[jj[i]]++; 2569 } 2570 cia[0] = oshift; 2571 for (i=0; i<n; i++) { 2572 cia[i+1] = cia[i] + collengths[i]; 2573 } 2574 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2575 jj = a->j; 2576 for (row=0; row<m; row++) { 2577 mr = a->i[row+1] - a->i[row]; 2578 for (i=0; i<mr; i++) { 2579 col = *jj++; 2580 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2581 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2582 } 2583 } 2584 ierr = PetscFree(collengths);CHKERRQ(ierr); 2585 *ia = cia; *ja = cja; 2586 *spidx = cspidx; 2587 PetscFunctionReturn(0); 2588 } 2589 2590 #undef __FUNCT__ 2591 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2592 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2593 { 2594 PetscErrorCode ierr; 2595 2596 PetscFunctionBegin; 2597 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2598 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 #undef __FUNCT__ 2603 #define __FUNCT__ "MatShift_SeqBAIJ" 2604 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2605 { 2606 PetscErrorCode ierr; 2607 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2608 2609 PetscFunctionBegin; 2610 if (!Y->preallocated || !aij->nz) { 2611 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2612 } 2613 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2614 PetscFunctionReturn(0); 2615 } 2616 2617 /* -------------------------------------------------------------------*/ 2618 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2619 MatGetRow_SeqBAIJ, 2620 MatRestoreRow_SeqBAIJ, 2621 MatMult_SeqBAIJ_N, 2622 /* 4*/ MatMultAdd_SeqBAIJ_N, 2623 MatMultTranspose_SeqBAIJ, 2624 MatMultTransposeAdd_SeqBAIJ, 2625 0, 2626 0, 2627 0, 2628 /* 10*/ 0, 2629 MatLUFactor_SeqBAIJ, 2630 0, 2631 0, 2632 MatTranspose_SeqBAIJ, 2633 /* 15*/ MatGetInfo_SeqBAIJ, 2634 MatEqual_SeqBAIJ, 2635 MatGetDiagonal_SeqBAIJ, 2636 MatDiagonalScale_SeqBAIJ, 2637 MatNorm_SeqBAIJ, 2638 /* 20*/ 0, 2639 MatAssemblyEnd_SeqBAIJ, 2640 MatSetOption_SeqBAIJ, 2641 MatZeroEntries_SeqBAIJ, 2642 /* 24*/ MatZeroRows_SeqBAIJ, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /* 29*/ MatSetUp_SeqBAIJ, 2648 0, 2649 0, 2650 0, 2651 0, 2652 /* 34*/ MatDuplicate_SeqBAIJ, 2653 0, 2654 0, 2655 MatILUFactor_SeqBAIJ, 2656 0, 2657 /* 39*/ MatAXPY_SeqBAIJ, 2658 MatGetSubMatrices_SeqBAIJ, 2659 MatIncreaseOverlap_SeqBAIJ, 2660 MatGetValues_SeqBAIJ, 2661 MatCopy_SeqBAIJ, 2662 /* 44*/ 0, 2663 MatScale_SeqBAIJ, 2664 MatShift_SeqBAIJ, 2665 0, 2666 MatZeroRowsColumns_SeqBAIJ, 2667 /* 49*/ 0, 2668 MatGetRowIJ_SeqBAIJ, 2669 MatRestoreRowIJ_SeqBAIJ, 2670 MatGetColumnIJ_SeqBAIJ, 2671 MatRestoreColumnIJ_SeqBAIJ, 2672 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2673 0, 2674 0, 2675 0, 2676 MatSetValuesBlocked_SeqBAIJ, 2677 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2678 MatDestroy_SeqBAIJ, 2679 MatView_SeqBAIJ, 2680 0, 2681 0, 2682 /* 64*/ 0, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2688 0, 2689 MatConvert_Basic, 2690 0, 2691 0, 2692 /* 74*/ 0, 2693 MatFDColoringApply_BAIJ, 2694 0, 2695 0, 2696 0, 2697 /* 79*/ 0, 2698 0, 2699 0, 2700 0, 2701 MatLoad_SeqBAIJ, 2702 /* 84*/ 0, 2703 0, 2704 0, 2705 0, 2706 0, 2707 /* 89*/ 0, 2708 0, 2709 0, 2710 0, 2711 0, 2712 /* 94*/ 0, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /* 99*/ 0, 2718 0, 2719 0, 2720 0, 2721 0, 2722 /*104*/ 0, 2723 MatRealPart_SeqBAIJ, 2724 MatImaginaryPart_SeqBAIJ, 2725 0, 2726 0, 2727 /*109*/ 0, 2728 0, 2729 0, 2730 0, 2731 MatMissingDiagonal_SeqBAIJ, 2732 /*114*/ 0, 2733 0, 2734 0, 2735 0, 2736 0, 2737 /*119*/ 0, 2738 0, 2739 MatMultHermitianTranspose_SeqBAIJ, 2740 MatMultHermitianTransposeAdd_SeqBAIJ, 2741 0, 2742 /*124*/ 0, 2743 0, 2744 MatInvertBlockDiagonal_SeqBAIJ, 2745 0, 2746 0, 2747 /*129*/ 0, 2748 0, 2749 0, 2750 0, 2751 0, 2752 /*134*/ 0, 2753 0, 2754 0, 2755 0, 2756 0, 2757 /*139*/ 0, 2758 0, 2759 0, 2760 MatFDColoringSetUp_SeqXAIJ, 2761 0, 2762 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2763 }; 2764 2765 #undef __FUNCT__ 2766 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2767 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2768 { 2769 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2770 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2771 PetscErrorCode ierr; 2772 2773 PetscFunctionBegin; 2774 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2775 2776 /* allocate space for values if not already there */ 2777 if (!aij->saved_values) { 2778 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2779 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2780 } 2781 2782 /* copy values over */ 2783 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2784 PetscFunctionReturn(0); 2785 } 2786 2787 #undef __FUNCT__ 2788 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2789 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2790 { 2791 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2792 PetscErrorCode ierr; 2793 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2794 2795 PetscFunctionBegin; 2796 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2797 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2798 2799 /* copy values over */ 2800 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2801 PetscFunctionReturn(0); 2802 } 2803 2804 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2805 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2806 2807 #undef __FUNCT__ 2808 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2809 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2810 { 2811 Mat_SeqBAIJ *b; 2812 PetscErrorCode ierr; 2813 PetscInt i,mbs,nbs,bs2; 2814 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2815 2816 PetscFunctionBegin; 2817 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2818 if (nz == MAT_SKIP_ALLOCATION) { 2819 skipallocation = PETSC_TRUE; 2820 nz = 0; 2821 } 2822 2823 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2824 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2825 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2826 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2827 2828 B->preallocated = PETSC_TRUE; 2829 2830 mbs = B->rmap->n/bs; 2831 nbs = B->cmap->n/bs; 2832 bs2 = bs*bs; 2833 2834 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2835 2836 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2837 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2838 if (nnz) { 2839 for (i=0; i<mbs; i++) { 2840 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2841 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2842 } 2843 } 2844 2845 b = (Mat_SeqBAIJ*)B->data; 2846 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2847 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2848 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2849 2850 if (!flg) { 2851 switch (bs) { 2852 case 1: 2853 B->ops->mult = MatMult_SeqBAIJ_1; 2854 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2855 break; 2856 case 2: 2857 B->ops->mult = MatMult_SeqBAIJ_2; 2858 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2859 break; 2860 case 3: 2861 B->ops->mult = MatMult_SeqBAIJ_3; 2862 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2863 break; 2864 case 4: 2865 B->ops->mult = MatMult_SeqBAIJ_4; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2867 break; 2868 case 5: 2869 B->ops->mult = MatMult_SeqBAIJ_5; 2870 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2871 break; 2872 case 6: 2873 B->ops->mult = MatMult_SeqBAIJ_6; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2875 break; 2876 case 7: 2877 B->ops->mult = MatMult_SeqBAIJ_7; 2878 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2879 break; 2880 case 15: 2881 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2882 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2883 break; 2884 default: 2885 B->ops->mult = MatMult_SeqBAIJ_N; 2886 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2887 break; 2888 } 2889 } 2890 B->ops->sor = MatSOR_SeqBAIJ; 2891 b->mbs = mbs; 2892 b->nbs = nbs; 2893 if (!skipallocation) { 2894 if (!b->imax) { 2895 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2896 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2897 2898 b->free_imax_ilen = PETSC_TRUE; 2899 } 2900 /* b->ilen will count nonzeros in each block row so far. */ 2901 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2902 if (!nnz) { 2903 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2904 else if (nz < 0) nz = 1; 2905 for (i=0; i<mbs; i++) b->imax[i] = nz; 2906 nz = nz*mbs; 2907 } else { 2908 nz = 0; 2909 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2910 } 2911 2912 /* allocate the matrix space */ 2913 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2914 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2915 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2916 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2917 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2918 2919 b->singlemalloc = PETSC_TRUE; 2920 b->i[0] = 0; 2921 for (i=1; i<mbs+1; i++) { 2922 b->i[i] = b->i[i-1] + b->imax[i-1]; 2923 } 2924 b->free_a = PETSC_TRUE; 2925 b->free_ij = PETSC_TRUE; 2926 } else { 2927 b->free_a = PETSC_FALSE; 2928 b->free_ij = PETSC_FALSE; 2929 } 2930 2931 b->bs2 = bs2; 2932 b->mbs = mbs; 2933 b->nz = 0; 2934 b->maxnz = nz; 2935 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2936 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2937 PetscFunctionReturn(0); 2938 } 2939 2940 #undef __FUNCT__ 2941 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2942 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2943 { 2944 PetscInt i,m,nz,nz_max=0,*nnz; 2945 PetscScalar *values=0; 2946 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2947 PetscErrorCode ierr; 2948 2949 PetscFunctionBegin; 2950 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2951 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2952 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2953 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2954 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2955 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2956 m = B->rmap->n/bs; 2957 2958 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2959 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2960 for (i=0; i<m; i++) { 2961 nz = ii[i+1]- ii[i]; 2962 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2963 nz_max = PetscMax(nz_max, nz); 2964 nnz[i] = nz; 2965 } 2966 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2967 ierr = PetscFree(nnz);CHKERRQ(ierr); 2968 2969 values = (PetscScalar*)V; 2970 if (!values) { 2971 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2972 } 2973 for (i=0; i<m; i++) { 2974 PetscInt ncols = ii[i+1] - ii[i]; 2975 const PetscInt *icols = jj + ii[i]; 2976 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2977 if (!roworiented) { 2978 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2979 } else { 2980 PetscInt j; 2981 for (j=0; j<ncols; j++) { 2982 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2983 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2984 } 2985 } 2986 } 2987 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2988 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2989 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2990 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2991 PetscFunctionReturn(0); 2992 } 2993 2994 /*MC 2995 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2996 block sparse compressed row format. 2997 2998 Options Database Keys: 2999 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3000 3001 Level: beginner 3002 3003 .seealso: MatCreateSeqBAIJ() 3004 M*/ 3005 3006 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3007 3008 #undef __FUNCT__ 3009 #define __FUNCT__ "MatCreate_SeqBAIJ" 3010 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3011 { 3012 PetscErrorCode ierr; 3013 PetscMPIInt size; 3014 Mat_SeqBAIJ *b; 3015 3016 PetscFunctionBegin; 3017 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3018 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3019 3020 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3021 B->data = (void*)b; 3022 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3023 3024 b->row = 0; 3025 b->col = 0; 3026 b->icol = 0; 3027 b->reallocs = 0; 3028 b->saved_values = 0; 3029 3030 b->roworiented = PETSC_TRUE; 3031 b->nonew = 0; 3032 b->diag = 0; 3033 B->spptr = 0; 3034 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3035 b->keepnonzeropattern = PETSC_FALSE; 3036 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3048 PetscFunctionReturn(0); 3049 } 3050 3051 #undef __FUNCT__ 3052 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3053 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3054 { 3055 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3056 PetscErrorCode ierr; 3057 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3058 3059 PetscFunctionBegin; 3060 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3061 3062 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3063 c->imax = a->imax; 3064 c->ilen = a->ilen; 3065 c->free_imax_ilen = PETSC_FALSE; 3066 } else { 3067 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3068 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3069 for (i=0; i<mbs; i++) { 3070 c->imax[i] = a->imax[i]; 3071 c->ilen[i] = a->ilen[i]; 3072 } 3073 c->free_imax_ilen = PETSC_TRUE; 3074 } 3075 3076 /* allocate the matrix space */ 3077 if (mallocmatspace) { 3078 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3079 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3080 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3081 3082 c->i = a->i; 3083 c->j = a->j; 3084 c->singlemalloc = PETSC_FALSE; 3085 c->free_a = PETSC_TRUE; 3086 c->free_ij = PETSC_FALSE; 3087 c->parent = A; 3088 C->preallocated = PETSC_TRUE; 3089 C->assembled = PETSC_TRUE; 3090 3091 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3092 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3093 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3094 } else { 3095 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3096 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3097 3098 c->singlemalloc = PETSC_TRUE; 3099 c->free_a = PETSC_TRUE; 3100 c->free_ij = PETSC_TRUE; 3101 3102 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3103 if (mbs > 0) { 3104 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3105 if (cpvalues == MAT_COPY_VALUES) { 3106 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3107 } else { 3108 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3109 } 3110 } 3111 C->preallocated = PETSC_TRUE; 3112 C->assembled = PETSC_TRUE; 3113 } 3114 } 3115 3116 c->roworiented = a->roworiented; 3117 c->nonew = a->nonew; 3118 3119 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3120 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3121 3122 c->bs2 = a->bs2; 3123 c->mbs = a->mbs; 3124 c->nbs = a->nbs; 3125 3126 if (a->diag) { 3127 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3128 c->diag = a->diag; 3129 c->free_diag = PETSC_FALSE; 3130 } else { 3131 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3132 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3133 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3134 c->free_diag = PETSC_TRUE; 3135 } 3136 } else c->diag = 0; 3137 3138 c->nz = a->nz; 3139 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3140 c->solve_work = NULL; 3141 c->mult_work = NULL; 3142 c->sor_workt = NULL; 3143 c->sor_work = NULL; 3144 3145 c->compressedrow.use = a->compressedrow.use; 3146 c->compressedrow.nrows = a->compressedrow.nrows; 3147 if (a->compressedrow.use) { 3148 i = a->compressedrow.nrows; 3149 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3150 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3151 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3152 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3153 } else { 3154 c->compressedrow.use = PETSC_FALSE; 3155 c->compressedrow.i = NULL; 3156 c->compressedrow.rindex = NULL; 3157 } 3158 C->nonzerostate = A->nonzerostate; 3159 3160 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3161 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3162 PetscFunctionReturn(0); 3163 } 3164 3165 #undef __FUNCT__ 3166 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3167 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3168 { 3169 PetscErrorCode ierr; 3170 3171 PetscFunctionBegin; 3172 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3173 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3174 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3175 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3176 PetscFunctionReturn(0); 3177 } 3178 3179 #undef __FUNCT__ 3180 #define __FUNCT__ "MatLoad_SeqBAIJ" 3181 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3182 { 3183 Mat_SeqBAIJ *a; 3184 PetscErrorCode ierr; 3185 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3186 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3187 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3188 PetscInt *masked,nmask,tmp,bs2,ishift; 3189 PetscMPIInt size; 3190 int fd; 3191 PetscScalar *aa; 3192 MPI_Comm comm; 3193 3194 PetscFunctionBegin; 3195 /* force binary viewer to load .info file if it has not yet done so */ 3196 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3197 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3198 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3199 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3200 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3201 if (bs < 0) bs = 1; 3202 bs2 = bs*bs; 3203 3204 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3205 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3206 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3207 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3208 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3209 M = header[1]; N = header[2]; nz = header[3]; 3210 3211 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3212 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3213 3214 /* 3215 This code adds extra rows to make sure the number of rows is 3216 divisible by the blocksize 3217 */ 3218 mbs = M/bs; 3219 extra_rows = bs - M + bs*(mbs); 3220 if (extra_rows == bs) extra_rows = 0; 3221 else mbs++; 3222 if (extra_rows) { 3223 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3224 } 3225 3226 /* Set global sizes if not already set */ 3227 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3228 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3229 } else { /* Check if the matrix global sizes are correct */ 3230 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3231 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3232 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3233 } 3234 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3235 } 3236 3237 /* read in row lengths */ 3238 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3239 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3240 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3241 3242 /* read in column indices */ 3243 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3244 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3245 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3246 3247 /* loop over row lengths determining block row lengths */ 3248 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3249 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3250 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3251 rowcount = 0; 3252 nzcount = 0; 3253 for (i=0; i<mbs; i++) { 3254 nmask = 0; 3255 for (j=0; j<bs; j++) { 3256 kmax = rowlengths[rowcount]; 3257 for (k=0; k<kmax; k++) { 3258 tmp = jj[nzcount++]/bs; 3259 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3260 } 3261 rowcount++; 3262 } 3263 browlengths[i] += nmask; 3264 /* zero out the mask elements we set */ 3265 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3266 } 3267 3268 /* Do preallocation */ 3269 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3270 a = (Mat_SeqBAIJ*)newmat->data; 3271 3272 /* set matrix "i" values */ 3273 a->i[0] = 0; 3274 for (i=1; i<= mbs; i++) { 3275 a->i[i] = a->i[i-1] + browlengths[i-1]; 3276 a->ilen[i-1] = browlengths[i-1]; 3277 } 3278 a->nz = 0; 3279 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3280 3281 /* read in nonzero values */ 3282 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3283 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3284 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3285 3286 /* set "a" and "j" values into matrix */ 3287 nzcount = 0; jcount = 0; 3288 for (i=0; i<mbs; i++) { 3289 nzcountb = nzcount; 3290 nmask = 0; 3291 for (j=0; j<bs; j++) { 3292 kmax = rowlengths[i*bs+j]; 3293 for (k=0; k<kmax; k++) { 3294 tmp = jj[nzcount++]/bs; 3295 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3296 } 3297 } 3298 /* sort the masked values */ 3299 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3300 3301 /* set "j" values into matrix */ 3302 maskcount = 1; 3303 for (j=0; j<nmask; j++) { 3304 a->j[jcount++] = masked[j]; 3305 mask[masked[j]] = maskcount++; 3306 } 3307 /* set "a" values into matrix */ 3308 ishift = bs2*a->i[i]; 3309 for (j=0; j<bs; j++) { 3310 kmax = rowlengths[i*bs+j]; 3311 for (k=0; k<kmax; k++) { 3312 tmp = jj[nzcountb]/bs; 3313 block = mask[tmp] - 1; 3314 point = jj[nzcountb] - bs*tmp; 3315 idx = ishift + bs2*block + j + bs*point; 3316 a->a[idx] = (MatScalar)aa[nzcountb++]; 3317 } 3318 } 3319 /* zero out the mask elements we set */ 3320 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3321 } 3322 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3323 3324 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3325 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3326 ierr = PetscFree(aa);CHKERRQ(ierr); 3327 ierr = PetscFree(jj);CHKERRQ(ierr); 3328 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3329 3330 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3331 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3332 PetscFunctionReturn(0); 3333 } 3334 3335 #undef __FUNCT__ 3336 #define __FUNCT__ "MatCreateSeqBAIJ" 3337 /*@C 3338 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3339 compressed row) format. For good matrix assembly performance the 3340 user should preallocate the matrix storage by setting the parameter nz 3341 (or the array nnz). By setting these parameters accurately, performance 3342 during matrix assembly can be increased by more than a factor of 50. 3343 3344 Collective on MPI_Comm 3345 3346 Input Parameters: 3347 + comm - MPI communicator, set to PETSC_COMM_SELF 3348 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3349 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3350 . m - number of rows 3351 . n - number of columns 3352 . nz - number of nonzero blocks per block row (same for all rows) 3353 - nnz - array containing the number of nonzero blocks in the various block rows 3354 (possibly different for each block row) or NULL 3355 3356 Output Parameter: 3357 . A - the matrix 3358 3359 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3360 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3361 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3362 3363 Options Database Keys: 3364 . -mat_no_unroll - uses code that does not unroll the loops in the 3365 block calculations (much slower) 3366 . -mat_block_size - size of the blocks to use 3367 3368 Level: intermediate 3369 3370 Notes: 3371 The number of rows and columns must be divisible by blocksize. 3372 3373 If the nnz parameter is given then the nz parameter is ignored 3374 3375 A nonzero block is any block that as 1 or more nonzeros in it 3376 3377 The block AIJ format is fully compatible with standard Fortran 77 3378 storage. That is, the stored row and column indices can begin at 3379 either one (as in Fortran) or zero. See the users' manual for details. 3380 3381 Specify the preallocated storage with either nz or nnz (not both). 3382 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3383 allocation. See Users-Manual: ch_mat for details. 3384 matrices. 3385 3386 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3387 @*/ 3388 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3389 { 3390 PetscErrorCode ierr; 3391 3392 PetscFunctionBegin; 3393 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3394 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3395 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3396 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3397 PetscFunctionReturn(0); 3398 } 3399 3400 #undef __FUNCT__ 3401 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3402 /*@C 3403 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3404 per row in the matrix. For good matrix assembly performance the 3405 user should preallocate the matrix storage by setting the parameter nz 3406 (or the array nnz). By setting these parameters accurately, performance 3407 during matrix assembly can be increased by more than a factor of 50. 3408 3409 Collective on MPI_Comm 3410 3411 Input Parameters: 3412 + B - the matrix 3413 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3414 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3415 . nz - number of block nonzeros per block row (same for all rows) 3416 - nnz - array containing the number of block nonzeros in the various block rows 3417 (possibly different for each block row) or NULL 3418 3419 Options Database Keys: 3420 . -mat_no_unroll - uses code that does not unroll the loops in the 3421 block calculations (much slower) 3422 . -mat_block_size - size of the blocks to use 3423 3424 Level: intermediate 3425 3426 Notes: 3427 If the nnz parameter is given then the nz parameter is ignored 3428 3429 You can call MatGetInfo() to get information on how effective the preallocation was; 3430 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3431 You can also run with the option -info and look for messages with the string 3432 malloc in them to see if additional memory allocation was needed. 3433 3434 The block AIJ format is fully compatible with standard Fortran 77 3435 storage. That is, the stored row and column indices can begin at 3436 either one (as in Fortran) or zero. See the users' manual for details. 3437 3438 Specify the preallocated storage with either nz or nnz (not both). 3439 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3440 allocation. See Users-Manual: ch_mat for details. 3441 3442 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3443 @*/ 3444 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3445 { 3446 PetscErrorCode ierr; 3447 3448 PetscFunctionBegin; 3449 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3450 PetscValidType(B,1); 3451 PetscValidLogicalCollectiveInt(B,bs,2); 3452 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3453 PetscFunctionReturn(0); 3454 } 3455 3456 #undef __FUNCT__ 3457 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3458 /*@C 3459 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3460 (the default sequential PETSc format). 3461 3462 Collective on MPI_Comm 3463 3464 Input Parameters: 3465 + B - the matrix 3466 . i - the indices into j for the start of each local row (starts with zero) 3467 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3468 - v - optional values in the matrix 3469 3470 Level: developer 3471 3472 Notes: 3473 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3474 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3475 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3476 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3477 block column and the second index is over columns within a block. 3478 3479 .keywords: matrix, aij, compressed row, sparse 3480 3481 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3482 @*/ 3483 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3484 { 3485 PetscErrorCode ierr; 3486 3487 PetscFunctionBegin; 3488 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3489 PetscValidType(B,1); 3490 PetscValidLogicalCollectiveInt(B,bs,2); 3491 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3492 PetscFunctionReturn(0); 3493 } 3494 3495 3496 #undef __FUNCT__ 3497 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3498 /*@ 3499 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3500 3501 Collective on MPI_Comm 3502 3503 Input Parameters: 3504 + comm - must be an MPI communicator of size 1 3505 . bs - size of block 3506 . m - number of rows 3507 . n - number of columns 3508 . i - row indices 3509 . j - column indices 3510 - a - matrix values 3511 3512 Output Parameter: 3513 . mat - the matrix 3514 3515 Level: advanced 3516 3517 Notes: 3518 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3519 once the matrix is destroyed 3520 3521 You cannot set new nonzero locations into this matrix, that will generate an error. 3522 3523 The i and j indices are 0 based 3524 3525 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3526 3527 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3528 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3529 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3530 with column-major ordering within blocks. 3531 3532 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3533 3534 @*/ 3535 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3536 { 3537 PetscErrorCode ierr; 3538 PetscInt ii; 3539 Mat_SeqBAIJ *baij; 3540 3541 PetscFunctionBegin; 3542 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3543 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3544 3545 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3546 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3547 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3548 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3549 baij = (Mat_SeqBAIJ*)(*mat)->data; 3550 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3551 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3552 3553 baij->i = i; 3554 baij->j = j; 3555 baij->a = a; 3556 3557 baij->singlemalloc = PETSC_FALSE; 3558 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3559 baij->free_a = PETSC_FALSE; 3560 baij->free_ij = PETSC_FALSE; 3561 3562 for (ii=0; ii<m; ii++) { 3563 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3564 #if defined(PETSC_USE_DEBUG) 3565 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3566 #endif 3567 } 3568 #if defined(PETSC_USE_DEBUG) 3569 for (ii=0; ii<baij->i[m]; ii++) { 3570 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3571 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3572 } 3573 #endif 3574 3575 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3576 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3577 PetscFunctionReturn(0); 3578 } 3579 3580 #undef __FUNCT__ 3581 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3582 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3583 { 3584 PetscErrorCode ierr; 3585 3586 PetscFunctionBegin; 3587 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3588 PetscFunctionReturn(0); 3589 } 3590