1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 21 { 22 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 23 PetscErrorCode ierr; 24 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 25 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 26 PetscReal shift = 0.0; 27 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 28 29 PetscFunctionBegin; 30 allowzeropivot = PetscNot(A->erroriffailure); 31 32 if (a->idiagvalid) { 33 if (values) *values = a->idiag; 34 PetscFunctionReturn(0); 35 } 36 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 37 diag_offset = a->diag; 38 if (!a->idiag) { 39 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 40 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 41 } 42 diag = a->idiag; 43 if (values) *values = a->idiag; 44 /* factor and invert each block */ 45 switch (bs) { 46 case 1: 47 for (i=0; i<mbs; i++) { 48 odiag = v + 1*diag_offset[i]; 49 diag[0] = odiag[0]; 50 51 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 52 if (allowzeropivot) { 53 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 54 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 55 A->factorerror_zeropivot_row = i; 56 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 57 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 58 } 59 60 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 61 diag += 1; 62 } 63 break; 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 69 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 70 diag += 4; 71 } 72 break; 73 case 3: 74 for (i=0; i<mbs; i++) { 75 odiag = v + 9*diag_offset[i]; 76 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 77 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 78 diag[8] = odiag[8]; 79 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 80 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 81 diag += 9; 82 } 83 break; 84 case 4: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 16*diag_offset[i]; 87 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 88 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 89 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 90 diag += 16; 91 } 92 break; 93 case 5: 94 for (i=0; i<mbs; i++) { 95 odiag = v + 25*diag_offset[i]; 96 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 97 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 98 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 99 diag += 25; 100 } 101 break; 102 case 6: 103 for (i=0; i<mbs; i++) { 104 odiag = v + 36*diag_offset[i]; 105 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 107 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 108 diag += 36; 109 } 110 break; 111 case 7: 112 for (i=0; i<mbs; i++) { 113 odiag = v + 49*diag_offset[i]; 114 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 115 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 116 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 117 diag += 49; 118 } 119 break; 120 default: 121 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 122 for (i=0; i<mbs; i++) { 123 odiag = v + bs2*diag_offset[i]; 124 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 125 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 126 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 diag += bs2; 128 } 129 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 130 } 131 a->idiagvalid = PETSC_TRUE; 132 PetscFunctionReturn(0); 133 } 134 135 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 PetscScalar *x,*work,*w,*workt,*t; 139 const MatScalar *v,*aa = a->a, *idiag; 140 const PetscScalar *b,*xb; 141 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 142 PetscErrorCode ierr; 143 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 144 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 145 146 PetscFunctionBegin; 147 its = its*lits; 148 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 149 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 150 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 151 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 152 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 153 154 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 155 156 if (!m) PetscFunctionReturn(0); 157 diag = a->diag; 158 idiag = a->idiag; 159 k = PetscMax(A->rmap->n,A->cmap->n); 160 if (!a->mult_work) { 161 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 162 } 163 if (!a->sor_workt) { 164 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 165 } 166 if (!a->sor_work) { 167 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 168 } 169 work = a->mult_work; 170 t = a->sor_workt; 171 w = a->sor_work; 172 173 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 174 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 175 176 if (flag & SOR_ZERO_INITIAL_GUESS) { 177 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 178 switch (bs) { 179 case 1: 180 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 181 t[0] = b[0]; 182 i2 = 1; 183 idiag += 1; 184 for (i=1; i<m; i++) { 185 v = aa + ai[i]; 186 vi = aj + ai[i]; 187 nz = diag[i] - ai[i]; 188 s[0] = b[i2]; 189 for (j=0; j<nz; j++) { 190 xw[0] = x[vi[j]]; 191 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 192 } 193 t[i2] = s[0]; 194 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 195 x[i2] = xw[0]; 196 idiag += 1; 197 i2 += 1; 198 } 199 break; 200 case 2: 201 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 202 t[0] = b[0]; t[1] = b[1]; 203 i2 = 2; 204 idiag += 4; 205 for (i=1; i<m; i++) { 206 v = aa + 4*ai[i]; 207 vi = aj + ai[i]; 208 nz = diag[i] - ai[i]; 209 s[0] = b[i2]; s[1] = b[i2+1]; 210 for (j=0; j<nz; j++) { 211 idx = 2*vi[j]; 212 it = 4*j; 213 xw[0] = x[idx]; xw[1] = x[1+idx]; 214 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 215 } 216 t[i2] = s[0]; t[i2+1] = s[1]; 217 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 218 x[i2] = xw[0]; x[i2+1] = xw[1]; 219 idiag += 4; 220 i2 += 2; 221 } 222 break; 223 case 3: 224 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 225 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 226 i2 = 3; 227 idiag += 9; 228 for (i=1; i<m; i++) { 229 v = aa + 9*ai[i]; 230 vi = aj + ai[i]; 231 nz = diag[i] - ai[i]; 232 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 233 while (nz--) { 234 idx = 3*(*vi++); 235 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 236 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 237 v += 9; 238 } 239 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 240 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 241 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 242 idiag += 9; 243 i2 += 3; 244 } 245 break; 246 case 4: 247 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 248 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 249 i2 = 4; 250 idiag += 16; 251 for (i=1; i<m; i++) { 252 v = aa + 16*ai[i]; 253 vi = aj + ai[i]; 254 nz = diag[i] - ai[i]; 255 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 256 while (nz--) { 257 idx = 4*(*vi++); 258 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 259 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 260 v += 16; 261 } 262 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 263 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 264 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 265 idiag += 16; 266 i2 += 4; 267 } 268 break; 269 case 5: 270 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 271 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 272 i2 = 5; 273 idiag += 25; 274 for (i=1; i<m; i++) { 275 v = aa + 25*ai[i]; 276 vi = aj + ai[i]; 277 nz = diag[i] - ai[i]; 278 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 279 while (nz--) { 280 idx = 5*(*vi++); 281 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 282 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 283 v += 25; 284 } 285 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 286 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 287 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 288 idiag += 25; 289 i2 += 5; 290 } 291 break; 292 case 6: 293 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 294 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 295 i2 = 6; 296 idiag += 36; 297 for (i=1; i<m; i++) { 298 v = aa + 36*ai[i]; 299 vi = aj + ai[i]; 300 nz = diag[i] - ai[i]; 301 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 302 while (nz--) { 303 idx = 6*(*vi++); 304 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 305 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 306 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 307 v += 36; 308 } 309 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 310 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 311 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 312 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 313 idiag += 36; 314 i2 += 6; 315 } 316 break; 317 case 7: 318 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 319 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 320 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 321 i2 = 7; 322 idiag += 49; 323 for (i=1; i<m; i++) { 324 v = aa + 49*ai[i]; 325 vi = aj + ai[i]; 326 nz = diag[i] - ai[i]; 327 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 328 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 329 while (nz--) { 330 idx = 7*(*vi++); 331 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 332 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 333 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 334 v += 49; 335 } 336 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 337 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 338 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 339 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 340 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 341 idiag += 49; 342 i2 += 7; 343 } 344 break; 345 default: 346 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 347 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 348 i2 = bs; 349 idiag += bs2; 350 for (i=1; i<m; i++) { 351 v = aa + bs2*ai[i]; 352 vi = aj + ai[i]; 353 nz = diag[i] - ai[i]; 354 355 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 356 /* copy all rows of x that are needed into contiguous space */ 357 workt = work; 358 for (j=0; j<nz; j++) { 359 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 360 workt += bs; 361 } 362 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 363 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 364 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 365 366 idiag += bs2; 367 i2 += bs; 368 } 369 break; 370 } 371 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 372 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 373 xb = t; 374 } 375 else xb = b; 376 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 377 idiag = a->idiag+bs2*(a->mbs-1); 378 i2 = bs * (m-1); 379 switch (bs) { 380 case 1: 381 s[0] = xb[i2]; 382 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 383 x[i2] = xw[0]; 384 i2 -= 1; 385 for (i=m-2; i>=0; i--) { 386 v = aa + (diag[i]+1); 387 vi = aj + diag[i] + 1; 388 nz = ai[i+1] - diag[i] - 1; 389 s[0] = xb[i2]; 390 for (j=0; j<nz; j++) { 391 xw[0] = x[vi[j]]; 392 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 393 } 394 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 395 x[i2] = xw[0]; 396 idiag -= 1; 397 i2 -= 1; 398 } 399 break; 400 case 2: 401 s[0] = xb[i2]; s[1] = xb[i2+1]; 402 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 403 x[i2] = xw[0]; x[i2+1] = xw[1]; 404 i2 -= 2; 405 idiag -= 4; 406 for (i=m-2; i>=0; i--) { 407 v = aa + 4*(diag[i] + 1); 408 vi = aj + diag[i] + 1; 409 nz = ai[i+1] - diag[i] - 1; 410 s[0] = xb[i2]; s[1] = xb[i2+1]; 411 for (j=0; j<nz; j++) { 412 idx = 2*vi[j]; 413 it = 4*j; 414 xw[0] = x[idx]; xw[1] = x[1+idx]; 415 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 416 } 417 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 418 x[i2] = xw[0]; x[i2+1] = xw[1]; 419 idiag -= 4; 420 i2 -= 2; 421 } 422 break; 423 case 3: 424 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 425 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 426 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 427 i2 -= 3; 428 idiag -= 9; 429 for (i=m-2; i>=0; i--) { 430 v = aa + 9*(diag[i]+1); 431 vi = aj + diag[i] + 1; 432 nz = ai[i+1] - diag[i] - 1; 433 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 434 while (nz--) { 435 idx = 3*(*vi++); 436 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 437 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 438 v += 9; 439 } 440 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 441 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 442 idiag -= 9; 443 i2 -= 3; 444 } 445 break; 446 case 4: 447 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 448 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 449 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 450 i2 -= 4; 451 idiag -= 16; 452 for (i=m-2; i>=0; i--) { 453 v = aa + 16*(diag[i]+1); 454 vi = aj + diag[i] + 1; 455 nz = ai[i+1] - diag[i] - 1; 456 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 457 while (nz--) { 458 idx = 4*(*vi++); 459 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 460 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 461 v += 16; 462 } 463 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 464 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 465 idiag -= 16; 466 i2 -= 4; 467 } 468 break; 469 case 5: 470 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 471 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 472 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 473 i2 -= 5; 474 idiag -= 25; 475 for (i=m-2; i>=0; i--) { 476 v = aa + 25*(diag[i]+1); 477 vi = aj + diag[i] + 1; 478 nz = ai[i+1] - diag[i] - 1; 479 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 480 while (nz--) { 481 idx = 5*(*vi++); 482 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 483 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 484 v += 25; 485 } 486 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 487 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 488 idiag -= 25; 489 i2 -= 5; 490 } 491 break; 492 case 6: 493 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 494 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 495 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 496 i2 -= 6; 497 idiag -= 36; 498 for (i=m-2; i>=0; i--) { 499 v = aa + 36*(diag[i]+1); 500 vi = aj + diag[i] + 1; 501 nz = ai[i+1] - diag[i] - 1; 502 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 503 while (nz--) { 504 idx = 6*(*vi++); 505 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 506 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 507 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 508 v += 36; 509 } 510 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 511 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 512 idiag -= 36; 513 i2 -= 6; 514 } 515 break; 516 case 7: 517 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 518 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 519 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 520 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 521 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 522 i2 -= 7; 523 idiag -= 49; 524 for (i=m-2; i>=0; i--) { 525 v = aa + 49*(diag[i]+1); 526 vi = aj + diag[i] + 1; 527 nz = ai[i+1] - diag[i] - 1; 528 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 529 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 530 while (nz--) { 531 idx = 7*(*vi++); 532 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 533 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 534 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 535 v += 49; 536 } 537 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 538 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 539 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 540 idiag -= 49; 541 i2 -= 7; 542 } 543 break; 544 default: 545 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 546 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 547 i2 -= bs; 548 idiag -= bs2; 549 for (i=m-2; i>=0; i--) { 550 v = aa + bs2*(diag[i]+1); 551 vi = aj + diag[i] + 1; 552 nz = ai[i+1] - diag[i] - 1; 553 554 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 555 /* copy all rows of x that are needed into contiguous space */ 556 workt = work; 557 for (j=0; j<nz; j++) { 558 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 559 workt += bs; 560 } 561 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 562 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 563 564 idiag -= bs2; 565 i2 -= bs; 566 } 567 break; 568 } 569 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 570 } 571 its--; 572 } 573 while (its--) { 574 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 575 idiag = a->idiag; 576 i2 = 0; 577 switch (bs) { 578 case 1: 579 for (i=0; i<m; i++) { 580 v = aa + ai[i]; 581 vi = aj + ai[i]; 582 nz = ai[i+1] - ai[i]; 583 s[0] = b[i2]; 584 for (j=0; j<nz; j++) { 585 xw[0] = x[vi[j]]; 586 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 587 } 588 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 589 x[i2] += xw[0]; 590 idiag += 1; 591 i2 += 1; 592 } 593 break; 594 case 2: 595 for (i=0; i<m; i++) { 596 v = aa + 4*ai[i]; 597 vi = aj + ai[i]; 598 nz = ai[i+1] - ai[i]; 599 s[0] = b[i2]; s[1] = b[i2+1]; 600 for (j=0; j<nz; j++) { 601 idx = 2*vi[j]; 602 it = 4*j; 603 xw[0] = x[idx]; xw[1] = x[1+idx]; 604 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 605 } 606 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 607 x[i2] += xw[0]; x[i2+1] += xw[1]; 608 idiag += 4; 609 i2 += 2; 610 } 611 break; 612 case 3: 613 for (i=0; i<m; i++) { 614 v = aa + 9*ai[i]; 615 vi = aj + ai[i]; 616 nz = ai[i+1] - ai[i]; 617 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 618 while (nz--) { 619 idx = 3*(*vi++); 620 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 621 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 622 v += 9; 623 } 624 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 625 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 626 idiag += 9; 627 i2 += 3; 628 } 629 break; 630 case 4: 631 for (i=0; i<m; i++) { 632 v = aa + 16*ai[i]; 633 vi = aj + ai[i]; 634 nz = ai[i+1] - ai[i]; 635 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 636 while (nz--) { 637 idx = 4*(*vi++); 638 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 639 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 640 v += 16; 641 } 642 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 643 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 644 idiag += 16; 645 i2 += 4; 646 } 647 break; 648 case 5: 649 for (i=0; i<m; i++) { 650 v = aa + 25*ai[i]; 651 vi = aj + ai[i]; 652 nz = ai[i+1] - ai[i]; 653 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 654 while (nz--) { 655 idx = 5*(*vi++); 656 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 657 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 658 v += 25; 659 } 660 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 661 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 662 idiag += 25; 663 i2 += 5; 664 } 665 break; 666 case 6: 667 for (i=0; i<m; i++) { 668 v = aa + 36*ai[i]; 669 vi = aj + ai[i]; 670 nz = ai[i+1] - ai[i]; 671 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 672 while (nz--) { 673 idx = 6*(*vi++); 674 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 675 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 677 v += 36; 678 } 679 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 681 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 682 idiag += 36; 683 i2 += 6; 684 } 685 break; 686 case 7: 687 for (i=0; i<m; i++) { 688 v = aa + 49*ai[i]; 689 vi = aj + ai[i]; 690 nz = ai[i+1] - ai[i]; 691 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 692 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 693 while (nz--) { 694 idx = 7*(*vi++); 695 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 696 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 697 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 698 v += 49; 699 } 700 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 701 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 702 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 703 idiag += 49; 704 i2 += 7; 705 } 706 break; 707 default: 708 for (i=0; i<m; i++) { 709 v = aa + bs2*ai[i]; 710 vi = aj + ai[i]; 711 nz = ai[i+1] - ai[i]; 712 713 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 714 /* copy all rows of x that are needed into contiguous space */ 715 workt = work; 716 for (j=0; j<nz; j++) { 717 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 718 workt += bs; 719 } 720 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 721 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 722 723 idiag += bs2; 724 i2 += bs; 725 } 726 break; 727 } 728 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 729 } 730 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 731 idiag = a->idiag+bs2*(a->mbs-1); 732 i2 = bs * (m-1); 733 switch (bs) { 734 case 1: 735 for (i=m-1; i>=0; i--) { 736 v = aa + ai[i]; 737 vi = aj + ai[i]; 738 nz = ai[i+1] - ai[i]; 739 s[0] = b[i2]; 740 for (j=0; j<nz; j++) { 741 xw[0] = x[vi[j]]; 742 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 743 } 744 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 745 x[i2] += xw[0]; 746 idiag -= 1; 747 i2 -= 1; 748 } 749 break; 750 case 2: 751 for (i=m-1; i>=0; i--) { 752 v = aa + 4*ai[i]; 753 vi = aj + ai[i]; 754 nz = ai[i+1] - ai[i]; 755 s[0] = b[i2]; s[1] = b[i2+1]; 756 for (j=0; j<nz; j++) { 757 idx = 2*vi[j]; 758 it = 4*j; 759 xw[0] = x[idx]; xw[1] = x[1+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 761 } 762 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 763 x[i2] += xw[0]; x[i2+1] += xw[1]; 764 idiag -= 4; 765 i2 -= 2; 766 } 767 break; 768 case 3: 769 for (i=m-1; i>=0; i--) { 770 v = aa + 9*ai[i]; 771 vi = aj + ai[i]; 772 nz = ai[i+1] - ai[i]; 773 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 774 while (nz--) { 775 idx = 3*(*vi++); 776 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 777 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 778 v += 9; 779 } 780 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 781 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 782 idiag -= 9; 783 i2 -= 3; 784 } 785 break; 786 case 4: 787 for (i=m-1; i>=0; i--) { 788 v = aa + 16*ai[i]; 789 vi = aj + ai[i]; 790 nz = ai[i+1] - ai[i]; 791 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 792 while (nz--) { 793 idx = 4*(*vi++); 794 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 795 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 796 v += 16; 797 } 798 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 799 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 800 idiag -= 16; 801 i2 -= 4; 802 } 803 break; 804 case 5: 805 for (i=m-1; i>=0; i--) { 806 v = aa + 25*ai[i]; 807 vi = aj + ai[i]; 808 nz = ai[i+1] - ai[i]; 809 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 810 while (nz--) { 811 idx = 5*(*vi++); 812 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 813 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 814 v += 25; 815 } 816 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 817 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 818 idiag -= 25; 819 i2 -= 5; 820 } 821 break; 822 case 6: 823 for (i=m-1; i>=0; i--) { 824 v = aa + 36*ai[i]; 825 vi = aj + ai[i]; 826 nz = ai[i+1] - ai[i]; 827 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 828 while (nz--) { 829 idx = 6*(*vi++); 830 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 831 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 833 v += 36; 834 } 835 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 837 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 838 idiag -= 36; 839 i2 -= 6; 840 } 841 break; 842 case 7: 843 for (i=m-1; i>=0; i--) { 844 v = aa + 49*ai[i]; 845 vi = aj + ai[i]; 846 nz = ai[i+1] - ai[i]; 847 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 848 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 849 while (nz--) { 850 idx = 7*(*vi++); 851 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 852 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 853 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 854 v += 49; 855 } 856 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 857 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 858 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 859 idiag -= 49; 860 i2 -= 7; 861 } 862 break; 863 default: 864 for (i=m-1; i>=0; i--) { 865 v = aa + bs2*ai[i]; 866 vi = aj + ai[i]; 867 nz = ai[i+1] - ai[i]; 868 869 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 870 /* copy all rows of x that are needed into contiguous space */ 871 workt = work; 872 for (j=0; j<nz; j++) { 873 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 874 workt += bs; 875 } 876 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 877 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 878 879 idiag -= bs2; 880 i2 -= bs; 881 } 882 break; 883 } 884 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 885 } 886 } 887 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 888 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 889 PetscFunctionReturn(0); 890 } 891 892 893 /* 894 Special version for direct calls from Fortran (Used in PETSc-fun3d) 895 */ 896 #if defined(PETSC_HAVE_FORTRAN_CAPS) 897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 899 #define matsetvaluesblocked4_ matsetvaluesblocked4 900 #endif 901 902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 903 { 904 Mat A = *AA; 905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 906 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 907 PetscInt *ai =a->i,*ailen=a->ilen; 908 PetscInt *aj =a->j,stepval,lastcol = -1; 909 const PetscScalar *value = v; 910 MatScalar *ap,*aa = a->a,*bap; 911 PetscErrorCode ierr; 912 913 PetscFunctionBegin; 914 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 915 stepval = (n-1)*4; 916 for (k=0; k<m; k++) { /* loop over added rows */ 917 row = im[k]; 918 rp = aj + ai[row]; 919 ap = aa + 16*ai[row]; 920 nrow = ailen[row]; 921 low = 0; 922 high = nrow; 923 for (l=0; l<n; l++) { /* loop over added columns */ 924 col = in[l]; 925 if (col <= lastcol) low = 0; 926 else high = nrow; 927 lastcol = col; 928 value = v + k*(stepval+4 + l)*4; 929 while (high-low > 7) { 930 t = (low+high)/2; 931 if (rp[t] > col) high = t; 932 else low = t; 933 } 934 for (i=low; i<high; i++) { 935 if (rp[i] > col) break; 936 if (rp[i] == col) { 937 bap = ap + 16*i; 938 for (ii=0; ii<4; ii++,value+=stepval) { 939 for (jj=ii; jj<16; jj+=4) { 940 bap[jj] += *value++; 941 } 942 } 943 goto noinsert2; 944 } 945 } 946 N = nrow++ - 1; 947 high++; /* added new column index thus must search to one higher than before */ 948 /* shift up all the later entries in this row */ 949 for (ii=N; ii>=i; ii--) { 950 rp[ii+1] = rp[ii]; 951 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 952 } 953 if (N >= i) { 954 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 955 } 956 rp[i] = col; 957 bap = ap + 16*i; 958 for (ii=0; ii<4; ii++,value+=stepval) { 959 for (jj=ii; jj<16; jj+=4) { 960 bap[jj] = *value++; 961 } 962 } 963 noinsert2:; 964 low = i; 965 } 966 ailen[row] = nrow; 967 } 968 PetscFunctionReturnVoid(); 969 } 970 971 #if defined(PETSC_HAVE_FORTRAN_CAPS) 972 #define matsetvalues4_ MATSETVALUES4 973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 974 #define matsetvalues4_ matsetvalues4 975 #endif 976 977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 978 { 979 Mat A = *AA; 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 981 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 982 PetscInt *ai=a->i,*ailen=a->ilen; 983 PetscInt *aj=a->j,brow,bcol; 984 PetscInt ridx,cidx,lastcol = -1; 985 MatScalar *ap,value,*aa=a->a,*bap; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 for (k=0; k<m; k++) { /* loop over added rows */ 990 row = im[k]; brow = row/4; 991 rp = aj + ai[brow]; 992 ap = aa + 16*ai[brow]; 993 nrow = ailen[brow]; 994 low = 0; 995 high = nrow; 996 for (l=0; l<n; l++) { /* loop over added columns */ 997 col = in[l]; bcol = col/4; 998 ridx = row % 4; cidx = col % 4; 999 value = v[l + k*n]; 1000 if (col <= lastcol) low = 0; 1001 else high = nrow; 1002 lastcol = col; 1003 while (high-low > 7) { 1004 t = (low+high)/2; 1005 if (rp[t] > bcol) high = t; 1006 else low = t; 1007 } 1008 for (i=low; i<high; i++) { 1009 if (rp[i] > bcol) break; 1010 if (rp[i] == bcol) { 1011 bap = ap + 16*i + 4*cidx + ridx; 1012 *bap += value; 1013 goto noinsert1; 1014 } 1015 } 1016 N = nrow++ - 1; 1017 high++; /* added new column thus must search to one higher than before */ 1018 /* shift up all the later entries in this row */ 1019 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1020 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1021 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1022 rp[i] = bcol; 1023 ap[16*i + 4*cidx + ridx] = value; 1024 noinsert1:; 1025 low = i; 1026 } 1027 ailen[brow] = nrow; 1028 } 1029 PetscFunctionReturnVoid(); 1030 } 1031 1032 /* 1033 Checks for missing diagonals 1034 */ 1035 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1036 { 1037 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1038 PetscErrorCode ierr; 1039 PetscInt *diag,*ii = a->i,i; 1040 1041 PetscFunctionBegin; 1042 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1043 *missing = PETSC_FALSE; 1044 if (A->rmap->n > 0 && !ii) { 1045 *missing = PETSC_TRUE; 1046 if (d) *d = 0; 1047 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1048 } else { 1049 PetscInt n; 1050 n = PetscMin(a->mbs, a->nbs); 1051 diag = a->diag; 1052 for (i=0; i<n; i++) { 1053 if (diag[i] >= ii[i+1]) { 1054 *missing = PETSC_TRUE; 1055 if (d) *d = i; 1056 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1057 break; 1058 } 1059 } 1060 } 1061 PetscFunctionReturn(0); 1062 } 1063 1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1065 { 1066 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1067 PetscErrorCode ierr; 1068 PetscInt i,j,m = a->mbs; 1069 1070 PetscFunctionBegin; 1071 if (!a->diag) { 1072 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1073 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1074 a->free_diag = PETSC_TRUE; 1075 } 1076 for (i=0; i<m; i++) { 1077 a->diag[i] = a->i[i+1]; 1078 for (j=a->i[i]; j<a->i[i+1]; j++) { 1079 if (a->j[j] == i) { 1080 a->diag[i] = j; 1081 break; 1082 } 1083 } 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1090 { 1091 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1092 PetscErrorCode ierr; 1093 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1094 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1095 1096 PetscFunctionBegin; 1097 *nn = n; 1098 if (!ia) PetscFunctionReturn(0); 1099 if (symmetric) { 1100 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1101 nz = tia[n]; 1102 } else { 1103 tia = a->i; tja = a->j; 1104 } 1105 1106 if (!blockcompressed && bs > 1) { 1107 (*nn) *= bs; 1108 /* malloc & create the natural set of indices */ 1109 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1110 if (n) { 1111 (*ia)[0] = oshift; 1112 for (j=1; j<bs; j++) { 1113 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1114 } 1115 } 1116 1117 for (i=1; i<n; i++) { 1118 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1119 for (j=1; j<bs; j++) { 1120 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1121 } 1122 } 1123 if (n) { 1124 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1125 } 1126 1127 if (inja) { 1128 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1129 cnt = 0; 1130 for (i=0; i<n; i++) { 1131 for (j=0; j<bs; j++) { 1132 for (k=tia[i]; k<tia[i+1]; k++) { 1133 for (l=0; l<bs; l++) { 1134 (*ja)[cnt++] = bs*tja[k] + l; 1135 } 1136 } 1137 } 1138 } 1139 } 1140 1141 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1142 ierr = PetscFree(tia);CHKERRQ(ierr); 1143 ierr = PetscFree(tja);CHKERRQ(ierr); 1144 } 1145 } else if (oshift == 1) { 1146 if (symmetric) { 1147 nz = tia[A->rmap->n/bs]; 1148 /* add 1 to i and j indices */ 1149 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1150 *ia = tia; 1151 if (ja) { 1152 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1153 *ja = tja; 1154 } 1155 } else { 1156 nz = a->i[A->rmap->n/bs]; 1157 /* malloc space and add 1 to i and j indices */ 1158 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1159 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1160 if (ja) { 1161 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1162 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1163 } 1164 } 1165 } else { 1166 *ia = tia; 1167 if (ja) *ja = tja; 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1173 { 1174 PetscErrorCode ierr; 1175 1176 PetscFunctionBegin; 1177 if (!ia) PetscFunctionReturn(0); 1178 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1179 ierr = PetscFree(*ia);CHKERRQ(ierr); 1180 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1181 } 1182 PetscFunctionReturn(0); 1183 } 1184 1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1186 { 1187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1188 PetscErrorCode ierr; 1189 1190 PetscFunctionBegin; 1191 #if defined(PETSC_USE_LOG) 1192 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1193 #endif 1194 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1195 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1197 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1198 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1199 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1200 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1201 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1204 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1205 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1206 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1207 1208 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1209 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1210 ierr = PetscFree(A->data);CHKERRQ(ierr); 1211 1212 ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1225 #if defined(PETSC_HAVE_HYPRE) 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1227 #endif 1228 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1233 { 1234 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1235 PetscErrorCode ierr; 1236 1237 PetscFunctionBegin; 1238 switch (op) { 1239 case MAT_ROW_ORIENTED: 1240 a->roworiented = flg; 1241 break; 1242 case MAT_KEEP_NONZERO_PATTERN: 1243 a->keepnonzeropattern = flg; 1244 break; 1245 case MAT_NEW_NONZERO_LOCATIONS: 1246 a->nonew = (flg ? 0 : 1); 1247 break; 1248 case MAT_NEW_NONZERO_LOCATION_ERR: 1249 a->nonew = (flg ? -1 : 0); 1250 break; 1251 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1252 a->nonew = (flg ? -2 : 0); 1253 break; 1254 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1255 a->nounused = (flg ? -1 : 0); 1256 break; 1257 case MAT_FORCE_DIAGONAL_ENTRIES: 1258 case MAT_IGNORE_OFF_PROC_ENTRIES: 1259 case MAT_USE_HASH_TABLE: 1260 case MAT_SORTED_FULL: 1261 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1262 break; 1263 case MAT_SPD: 1264 case MAT_SYMMETRIC: 1265 case MAT_STRUCTURALLY_SYMMETRIC: 1266 case MAT_HERMITIAN: 1267 case MAT_SYMMETRY_ETERNAL: 1268 case MAT_SUBMAT_SINGLEIS: 1269 case MAT_STRUCTURE_ONLY: 1270 /* These options are handled directly by MatSetOption() */ 1271 break; 1272 default: 1273 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1274 } 1275 PetscFunctionReturn(0); 1276 } 1277 1278 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1280 { 1281 PetscErrorCode ierr; 1282 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1283 MatScalar *aa_i; 1284 PetscScalar *v_i; 1285 1286 PetscFunctionBegin; 1287 bs = A->rmap->bs; 1288 bs2 = bs*bs; 1289 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1290 1291 bn = row/bs; /* Block number */ 1292 bp = row % bs; /* Block Position */ 1293 M = ai[bn+1] - ai[bn]; 1294 *nz = bs*M; 1295 1296 if (v) { 1297 *v = NULL; 1298 if (*nz) { 1299 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1300 for (i=0; i<M; i++) { /* for each block in the block row */ 1301 v_i = *v + i*bs; 1302 aa_i = aa + bs2*(ai[bn] + i); 1303 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1304 } 1305 } 1306 } 1307 1308 if (idx) { 1309 *idx = NULL; 1310 if (*nz) { 1311 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1312 for (i=0; i<M; i++) { /* for each block in the block row */ 1313 idx_i = *idx + i*bs; 1314 itmp = bs*aj[ai[bn] + i]; 1315 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1316 } 1317 } 1318 } 1319 PetscFunctionReturn(0); 1320 } 1321 1322 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1323 { 1324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1325 PetscErrorCode ierr; 1326 1327 PetscFunctionBegin; 1328 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1329 PetscFunctionReturn(0); 1330 } 1331 1332 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1333 { 1334 PetscErrorCode ierr; 1335 1336 PetscFunctionBegin; 1337 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1338 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1339 PetscFunctionReturn(0); 1340 } 1341 1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1343 { 1344 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1345 Mat C; 1346 PetscErrorCode ierr; 1347 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1348 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1349 MatScalar *ata,*aa=a->a; 1350 1351 PetscFunctionBegin; 1352 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1353 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1354 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1355 1356 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1357 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1358 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1359 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1360 1361 at = (Mat_SeqBAIJ*)C->data; 1362 ati = at->i; 1363 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1364 } else { 1365 C = *B; 1366 at = (Mat_SeqBAIJ*)C->data; 1367 ati = at->i; 1368 } 1369 1370 atj = at->j; 1371 ata = at->a; 1372 1373 /* Copy ati into atfill so we have locations of the next free space in atj */ 1374 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1375 1376 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1377 for (i=0; i<mbs; i++) { 1378 anzj = ai[i+1] - ai[i]; 1379 for (j=0; j<anzj; j++) { 1380 atj[atfill[*aj]] = i; 1381 for (kr=0; kr<bs; kr++) { 1382 for (k=0; k<bs; k++) { 1383 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1384 } 1385 } 1386 atfill[*aj++] += 1; 1387 } 1388 } 1389 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1390 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 1392 /* Clean up temporary space and complete requests. */ 1393 ierr = PetscFree(atfill);CHKERRQ(ierr); 1394 1395 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1396 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1397 *B = C; 1398 } else { 1399 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1400 } 1401 PetscFunctionReturn(0); 1402 } 1403 1404 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1405 { 1406 PetscErrorCode ierr; 1407 Mat Btrans; 1408 1409 PetscFunctionBegin; 1410 *f = PETSC_FALSE; 1411 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1412 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1413 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1418 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1419 { 1420 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1421 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1422 PetscInt *rowlens,*colidxs; 1423 PetscScalar *matvals; 1424 PetscErrorCode ierr; 1425 1426 PetscFunctionBegin; 1427 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1428 1429 M = mat->rmap->N; 1430 N = mat->cmap->N; 1431 m = mat->rmap->n; 1432 bs = mat->rmap->bs; 1433 nz = bs*bs*A->nz; 1434 1435 /* write matrix header */ 1436 header[0] = MAT_FILE_CLASSID; 1437 header[1] = M; header[2] = N; header[3] = nz; 1438 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1439 1440 /* store row lengths */ 1441 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1442 for (cnt=0, i=0; i<A->mbs; i++) 1443 for (j=0; j<bs; j++) 1444 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1445 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1446 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1447 1448 /* store column indices */ 1449 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1450 for (cnt=0, i=0; i<A->mbs; i++) 1451 for (k=0; k<bs; k++) 1452 for (j=A->i[i]; j<A->i[i+1]; j++) 1453 for (l=0; l<bs; l++) 1454 colidxs[cnt++] = bs*A->j[j] + l; 1455 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1456 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1457 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1458 1459 /* store nonzero values */ 1460 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1461 for (cnt=0, i=0; i<A->mbs; i++) 1462 for (k=0; k<bs; k++) 1463 for (j=A->i[i]; j<A->i[i+1]; j++) 1464 for (l=0; l<bs; l++) 1465 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1466 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1467 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1468 ierr = PetscFree(matvals);CHKERRQ(ierr); 1469 1470 /* write block size option to the viewer's .info file */ 1471 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } 1474 1475 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1476 { 1477 PetscErrorCode ierr; 1478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1479 PetscInt i,bs = A->rmap->bs,k; 1480 1481 PetscFunctionBegin; 1482 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1483 for (i=0; i<a->mbs; i++) { 1484 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1485 for (k=a->i[i]; k<a->i[i+1]; k++) { 1486 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1487 } 1488 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1489 } 1490 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1491 PetscFunctionReturn(0); 1492 } 1493 1494 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1495 { 1496 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1497 PetscErrorCode ierr; 1498 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1499 PetscViewerFormat format; 1500 1501 PetscFunctionBegin; 1502 if (A->structure_only) { 1503 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1504 PetscFunctionReturn(0); 1505 } 1506 1507 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1508 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1509 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1510 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1511 const char *matname; 1512 Mat aij; 1513 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1514 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1515 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1516 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1517 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1518 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1519 PetscFunctionReturn(0); 1520 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1521 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1522 for (i=0; i<a->mbs; i++) { 1523 for (j=0; j<bs; j++) { 1524 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1525 for (k=a->i[i]; k<a->i[i+1]; k++) { 1526 for (l=0; l<bs; l++) { 1527 #if defined(PETSC_USE_COMPLEX) 1528 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1529 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1530 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1531 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1532 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1533 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1534 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1535 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1536 } 1537 #else 1538 if (a->a[bs2*k + l*bs + j] != 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1540 } 1541 #endif 1542 } 1543 } 1544 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1545 } 1546 } 1547 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1548 } else { 1549 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1550 for (i=0; i<a->mbs; i++) { 1551 for (j=0; j<bs; j++) { 1552 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1553 for (k=a->i[i]; k<a->i[i+1]; k++) { 1554 for (l=0; l<bs; l++) { 1555 #if defined(PETSC_USE_COMPLEX) 1556 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1557 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1558 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1559 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1560 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1561 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1562 } else { 1563 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1564 } 1565 #else 1566 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1567 #endif 1568 } 1569 } 1570 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1571 } 1572 } 1573 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1574 } 1575 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 1579 #include <petscdraw.h> 1580 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1581 { 1582 Mat A = (Mat) Aa; 1583 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1584 PetscErrorCode ierr; 1585 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1586 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1587 MatScalar *aa; 1588 PetscViewer viewer; 1589 PetscViewerFormat format; 1590 1591 PetscFunctionBegin; 1592 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1593 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1594 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1595 1596 /* loop over matrix elements drawing boxes */ 1597 1598 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1599 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1600 /* Blue for negative, Cyan for zero and Red for positive */ 1601 color = PETSC_DRAW_BLUE; 1602 for (i=0,row=0; i<mbs; i++,row+=bs) { 1603 for (j=a->i[i]; j<a->i[i+1]; j++) { 1604 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1605 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1606 aa = a->a + j*bs2; 1607 for (k=0; k<bs; k++) { 1608 for (l=0; l<bs; l++) { 1609 if (PetscRealPart(*aa++) >= 0.) continue; 1610 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1611 } 1612 } 1613 } 1614 } 1615 color = PETSC_DRAW_CYAN; 1616 for (i=0,row=0; i<mbs; i++,row+=bs) { 1617 for (j=a->i[i]; j<a->i[i+1]; j++) { 1618 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1619 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1620 aa = a->a + j*bs2; 1621 for (k=0; k<bs; k++) { 1622 for (l=0; l<bs; l++) { 1623 if (PetscRealPart(*aa++) != 0.) continue; 1624 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1625 } 1626 } 1627 } 1628 } 1629 color = PETSC_DRAW_RED; 1630 for (i=0,row=0; i<mbs; i++,row+=bs) { 1631 for (j=a->i[i]; j<a->i[i+1]; j++) { 1632 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1633 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1634 aa = a->a + j*bs2; 1635 for (k=0; k<bs; k++) { 1636 for (l=0; l<bs; l++) { 1637 if (PetscRealPart(*aa++) <= 0.) continue; 1638 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1639 } 1640 } 1641 } 1642 } 1643 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1644 } else { 1645 /* use contour shading to indicate magnitude of values */ 1646 /* first determine max of all nonzero values */ 1647 PetscReal minv = 0.0, maxv = 0.0; 1648 PetscDraw popup; 1649 1650 for (i=0; i<a->nz*a->bs2; i++) { 1651 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1652 } 1653 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1654 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1655 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1656 1657 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1658 for (i=0,row=0; i<mbs; i++,row+=bs) { 1659 for (j=a->i[i]; j<a->i[i+1]; j++) { 1660 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1661 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1662 aa = a->a + j*bs2; 1663 for (k=0; k<bs; k++) { 1664 for (l=0; l<bs; l++) { 1665 MatScalar v = *aa++; 1666 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1667 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1668 } 1669 } 1670 } 1671 } 1672 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1673 } 1674 PetscFunctionReturn(0); 1675 } 1676 1677 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1678 { 1679 PetscErrorCode ierr; 1680 PetscReal xl,yl,xr,yr,w,h; 1681 PetscDraw draw; 1682 PetscBool isnull; 1683 1684 PetscFunctionBegin; 1685 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1686 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1687 if (isnull) PetscFunctionReturn(0); 1688 1689 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1690 xr += w; yr += h; xl = -w; yl = -h; 1691 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1692 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1693 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1694 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1695 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1696 PetscFunctionReturn(0); 1697 } 1698 1699 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1700 { 1701 PetscErrorCode ierr; 1702 PetscBool iascii,isbinary,isdraw; 1703 1704 PetscFunctionBegin; 1705 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1707 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1708 if (iascii) { 1709 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1710 } else if (isbinary) { 1711 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1712 } else if (isdraw) { 1713 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1714 } else { 1715 Mat B; 1716 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1717 ierr = MatView(B,viewer);CHKERRQ(ierr); 1718 ierr = MatDestroy(&B);CHKERRQ(ierr); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 1724 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1725 { 1726 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1727 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1728 PetscInt *ai = a->i,*ailen = a->ilen; 1729 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1730 MatScalar *ap,*aa = a->a; 1731 1732 PetscFunctionBegin; 1733 for (k=0; k<m; k++) { /* loop over rows */ 1734 row = im[k]; brow = row/bs; 1735 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1736 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1737 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1738 nrow = ailen[brow]; 1739 for (l=0; l<n; l++) { /* loop over columns */ 1740 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1741 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1742 col = in[l]; 1743 bcol = col/bs; 1744 cidx = col%bs; 1745 ridx = row%bs; 1746 high = nrow; 1747 low = 0; /* assume unsorted */ 1748 while (high-low > 5) { 1749 t = (low+high)/2; 1750 if (rp[t] > bcol) high = t; 1751 else low = t; 1752 } 1753 for (i=low; i<high; i++) { 1754 if (rp[i] > bcol) break; 1755 if (rp[i] == bcol) { 1756 *v++ = ap[bs2*i+bs*cidx+ridx]; 1757 goto finished; 1758 } 1759 } 1760 *v++ = 0.0; 1761 finished:; 1762 } 1763 } 1764 PetscFunctionReturn(0); 1765 } 1766 1767 PetscErrorCode MatPermute_SeqBAIJ(Mat A,IS rowp,IS colp,Mat *B) 1768 { 1769 PetscErrorCode ierr; 1770 1771 PetscFunctionBegin; 1772 ierr = MatCreateSubMatrix(A,rowp,colp,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1773 PetscFunctionReturn(0); 1774 } 1775 1776 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1777 { 1778 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1779 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1780 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1781 PetscErrorCode ierr; 1782 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1783 PetscBool roworiented=a->roworiented; 1784 const PetscScalar *value = v; 1785 MatScalar *ap=NULL,*aa = a->a,*bap; 1786 1787 PetscFunctionBegin; 1788 if (roworiented) { 1789 stepval = (n-1)*bs; 1790 } else { 1791 stepval = (m-1)*bs; 1792 } 1793 for (k=0; k<m; k++) { /* loop over added rows */ 1794 row = im[k]; 1795 if (row < 0) continue; 1796 if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1797 rp = aj + ai[row]; 1798 if (!A->structure_only) ap = aa + bs2*ai[row]; 1799 rmax = imax[row]; 1800 nrow = ailen[row]; 1801 low = 0; 1802 high = nrow; 1803 for (l=0; l<n; l++) { /* loop over added columns */ 1804 if (in[l] < 0) continue; 1805 if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1806 col = in[l]; 1807 if (!A->structure_only) { 1808 if (roworiented) { 1809 value = v + (k*(stepval+bs) + l)*bs; 1810 } else { 1811 value = v + (l*(stepval+bs) + k)*bs; 1812 } 1813 } 1814 if (col <= lastcol) low = 0; 1815 else high = nrow; 1816 lastcol = col; 1817 while (high-low > 7) { 1818 t = (low+high)/2; 1819 if (rp[t] > col) high = t; 1820 else low = t; 1821 } 1822 for (i=low; i<high; i++) { 1823 if (rp[i] > col) break; 1824 if (rp[i] == col) { 1825 if (A->structure_only) goto noinsert2; 1826 bap = ap + bs2*i; 1827 if (roworiented) { 1828 if (is == ADD_VALUES) { 1829 for (ii=0; ii<bs; ii++,value+=stepval) { 1830 for (jj=ii; jj<bs2; jj+=bs) { 1831 bap[jj] += *value++; 1832 } 1833 } 1834 } else { 1835 for (ii=0; ii<bs; ii++,value+=stepval) { 1836 for (jj=ii; jj<bs2; jj+=bs) { 1837 bap[jj] = *value++; 1838 } 1839 } 1840 } 1841 } else { 1842 if (is == ADD_VALUES) { 1843 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1844 for (jj=0; jj<bs; jj++) { 1845 bap[jj] += value[jj]; 1846 } 1847 bap += bs; 1848 } 1849 } else { 1850 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1851 for (jj=0; jj<bs; jj++) { 1852 bap[jj] = value[jj]; 1853 } 1854 bap += bs; 1855 } 1856 } 1857 } 1858 goto noinsert2; 1859 } 1860 } 1861 if (nonew == 1) goto noinsert2; 1862 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1863 if (A->structure_only) { 1864 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1865 } else { 1866 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1867 } 1868 N = nrow++ - 1; high++; 1869 /* shift up all the later entries in this row */ 1870 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1871 rp[i] = col; 1872 if (!A->structure_only) { 1873 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1874 bap = ap + bs2*i; 1875 if (roworiented) { 1876 for (ii=0; ii<bs; ii++,value+=stepval) { 1877 for (jj=ii; jj<bs2; jj+=bs) { 1878 bap[jj] = *value++; 1879 } 1880 } 1881 } else { 1882 for (ii=0; ii<bs; ii++,value+=stepval) { 1883 for (jj=0; jj<bs; jj++) { 1884 *bap++ = *value++; 1885 } 1886 } 1887 } 1888 } 1889 noinsert2:; 1890 low = i; 1891 } 1892 ailen[row] = nrow; 1893 } 1894 PetscFunctionReturn(0); 1895 } 1896 1897 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1898 { 1899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1900 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1901 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1902 PetscErrorCode ierr; 1903 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1904 MatScalar *aa = a->a,*ap; 1905 PetscReal ratio=0.6; 1906 1907 PetscFunctionBegin; 1908 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1909 1910 if (m) rmax = ailen[0]; 1911 for (i=1; i<mbs; i++) { 1912 /* move each row back by the amount of empty slots (fshift) before it*/ 1913 fshift += imax[i-1] - ailen[i-1]; 1914 rmax = PetscMax(rmax,ailen[i]); 1915 if (fshift) { 1916 ip = aj + ai[i]; 1917 ap = aa + bs2*ai[i]; 1918 N = ailen[i]; 1919 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1920 if (!A->structure_only) { 1921 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1922 } 1923 } 1924 ai[i] = ai[i-1] + ailen[i-1]; 1925 } 1926 if (mbs) { 1927 fshift += imax[mbs-1] - ailen[mbs-1]; 1928 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1929 } 1930 1931 /* reset ilen and imax for each row */ 1932 a->nonzerorowcnt = 0; 1933 if (A->structure_only) { 1934 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1935 } else { /* !A->structure_only */ 1936 for (i=0; i<mbs; i++) { 1937 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1938 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1939 } 1940 } 1941 a->nz = ai[mbs]; 1942 1943 /* diagonals may have moved, so kill the diagonal pointers */ 1944 a->idiagvalid = PETSC_FALSE; 1945 if (fshift && a->diag) { 1946 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1947 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1948 a->diag = NULL; 1949 } 1950 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1951 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1953 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1954 1955 A->info.mallocs += a->reallocs; 1956 a->reallocs = 0; 1957 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1958 a->rmax = rmax; 1959 1960 if (!A->structure_only) { 1961 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1962 } 1963 PetscFunctionReturn(0); 1964 } 1965 1966 /* 1967 This function returns an array of flags which indicate the locations of contiguous 1968 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1969 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1970 Assume: sizes should be long enough to hold all the values. 1971 */ 1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1973 { 1974 PetscInt i,j,k,row; 1975 PetscBool flg; 1976 1977 PetscFunctionBegin; 1978 for (i=0,j=0; i<n; j++) { 1979 row = idx[i]; 1980 if (row%bs!=0) { /* Not the begining of a block */ 1981 sizes[j] = 1; 1982 i++; 1983 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1984 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1985 i++; 1986 } else { /* Begining of the block, so check if the complete block exists */ 1987 flg = PETSC_TRUE; 1988 for (k=1; k<bs; k++) { 1989 if (row+k != idx[i+k]) { /* break in the block */ 1990 flg = PETSC_FALSE; 1991 break; 1992 } 1993 } 1994 if (flg) { /* No break in the bs */ 1995 sizes[j] = bs; 1996 i += bs; 1997 } else { 1998 sizes[j] = 1; 1999 i++; 2000 } 2001 } 2002 } 2003 *bs_max = j; 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2008 { 2009 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2010 PetscErrorCode ierr; 2011 PetscInt i,j,k,count,*rows; 2012 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2013 PetscScalar zero = 0.0; 2014 MatScalar *aa; 2015 const PetscScalar *xx; 2016 PetscScalar *bb; 2017 2018 PetscFunctionBegin; 2019 /* fix right hand side if needed */ 2020 if (x && b) { 2021 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2022 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2023 for (i=0; i<is_n; i++) { 2024 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2025 } 2026 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2027 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2028 } 2029 2030 /* Make a copy of the IS and sort it */ 2031 /* allocate memory for rows,sizes */ 2032 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2033 2034 /* copy IS values to rows, and sort them */ 2035 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2036 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2037 2038 if (baij->keepnonzeropattern) { 2039 for (i=0; i<is_n; i++) sizes[i] = 1; 2040 bs_max = is_n; 2041 } else { 2042 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2043 A->nonzerostate++; 2044 } 2045 2046 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2047 row = rows[j]; 2048 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2049 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2050 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2051 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2052 if (diag != (PetscScalar)0.0) { 2053 if (baij->ilen[row/bs] > 0) { 2054 baij->ilen[row/bs] = 1; 2055 baij->j[baij->i[row/bs]] = row/bs; 2056 2057 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2058 } 2059 /* Now insert all the diagonal values for this bs */ 2060 for (k=0; k<bs; k++) { 2061 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2062 } 2063 } else { /* (diag == 0.0) */ 2064 baij->ilen[row/bs] = 0; 2065 } /* end (diag == 0.0) */ 2066 } else { /* (sizes[i] != bs) */ 2067 if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2068 for (k=0; k<count; k++) { 2069 aa[0] = zero; 2070 aa += bs; 2071 } 2072 if (diag != (PetscScalar)0.0) { 2073 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2074 } 2075 } 2076 } 2077 2078 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2079 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2080 PetscFunctionReturn(0); 2081 } 2082 2083 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2084 { 2085 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2086 PetscErrorCode ierr; 2087 PetscInt i,j,k,count; 2088 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2089 PetscScalar zero = 0.0; 2090 MatScalar *aa; 2091 const PetscScalar *xx; 2092 PetscScalar *bb; 2093 PetscBool *zeroed,vecs = PETSC_FALSE; 2094 2095 PetscFunctionBegin; 2096 /* fix right hand side if needed */ 2097 if (x && b) { 2098 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2099 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2100 vecs = PETSC_TRUE; 2101 } 2102 2103 /* zero the columns */ 2104 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2105 for (i=0; i<is_n; i++) { 2106 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2107 zeroed[is_idx[i]] = PETSC_TRUE; 2108 } 2109 for (i=0; i<A->rmap->N; i++) { 2110 if (!zeroed[i]) { 2111 row = i/bs; 2112 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2113 for (k=0; k<bs; k++) { 2114 col = bs*baij->j[j] + k; 2115 if (zeroed[col]) { 2116 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2117 if (vecs) bb[i] -= aa[0]*xx[col]; 2118 aa[0] = 0.0; 2119 } 2120 } 2121 } 2122 } else if (vecs) bb[i] = diag*xx[i]; 2123 } 2124 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2125 if (vecs) { 2126 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2127 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2128 } 2129 2130 /* zero the rows */ 2131 for (i=0; i<is_n; i++) { 2132 row = is_idx[i]; 2133 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2134 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2135 for (k=0; k<count; k++) { 2136 aa[0] = zero; 2137 aa += bs; 2138 } 2139 if (diag != (PetscScalar)0.0) { 2140 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2141 } 2142 } 2143 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2148 { 2149 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2150 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2151 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2152 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2153 PetscErrorCode ierr; 2154 PetscInt ridx,cidx,bs2=a->bs2; 2155 PetscBool roworiented=a->roworiented; 2156 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2157 2158 PetscFunctionBegin; 2159 for (k=0; k<m; k++) { /* loop over added rows */ 2160 row = im[k]; 2161 brow = row/bs; 2162 if (row < 0) continue; 2163 if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2164 rp = aj + ai[brow]; 2165 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2166 rmax = imax[brow]; 2167 nrow = ailen[brow]; 2168 low = 0; 2169 high = nrow; 2170 for (l=0; l<n; l++) { /* loop over added columns */ 2171 if (in[l] < 0) continue; 2172 if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2173 col = in[l]; bcol = col/bs; 2174 ridx = row % bs; cidx = col % bs; 2175 if (!A->structure_only) { 2176 if (roworiented) { 2177 value = v[l + k*n]; 2178 } else { 2179 value = v[k + l*m]; 2180 } 2181 } 2182 if (col <= lastcol) low = 0; else high = nrow; 2183 lastcol = col; 2184 while (high-low > 7) { 2185 t = (low+high)/2; 2186 if (rp[t] > bcol) high = t; 2187 else low = t; 2188 } 2189 for (i=low; i<high; i++) { 2190 if (rp[i] > bcol) break; 2191 if (rp[i] == bcol) { 2192 bap = ap + bs2*i + bs*cidx + ridx; 2193 if (!A->structure_only) { 2194 if (is == ADD_VALUES) *bap += value; 2195 else *bap = value; 2196 } 2197 goto noinsert1; 2198 } 2199 } 2200 if (nonew == 1) goto noinsert1; 2201 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2202 if (A->structure_only) { 2203 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2204 } else { 2205 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2206 } 2207 N = nrow++ - 1; high++; 2208 /* shift up all the later entries in this row */ 2209 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2210 rp[i] = bcol; 2211 if (!A->structure_only) { 2212 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2213 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2214 ap[bs2*i + bs*cidx + ridx] = value; 2215 } 2216 a->nz++; 2217 A->nonzerostate++; 2218 noinsert1:; 2219 low = i; 2220 } 2221 ailen[brow] = nrow; 2222 } 2223 PetscFunctionReturn(0); 2224 } 2225 2226 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2227 { 2228 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2229 Mat outA; 2230 PetscErrorCode ierr; 2231 PetscBool row_identity,col_identity; 2232 2233 PetscFunctionBegin; 2234 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2235 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2236 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2237 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2238 2239 outA = inA; 2240 inA->factortype = MAT_FACTOR_LU; 2241 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2242 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2243 2244 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2245 2246 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2247 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2248 a->row = row; 2249 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2250 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2251 a->col = col; 2252 2253 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2254 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2255 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2256 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2257 2258 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2259 if (!a->solve_work) { 2260 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2261 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2262 } 2263 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2268 { 2269 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2270 PetscInt i,nz,mbs; 2271 2272 PetscFunctionBegin; 2273 nz = baij->maxnz; 2274 mbs = baij->mbs; 2275 for (i=0; i<nz; i++) { 2276 baij->j[i] = indices[i]; 2277 } 2278 baij->nz = nz; 2279 for (i=0; i<mbs; i++) { 2280 baij->ilen[i] = baij->imax[i]; 2281 } 2282 PetscFunctionReturn(0); 2283 } 2284 2285 /*@ 2286 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2287 in the matrix. 2288 2289 Input Parameters: 2290 + mat - the SeqBAIJ matrix 2291 - indices - the column indices 2292 2293 Level: advanced 2294 2295 Notes: 2296 This can be called if you have precomputed the nonzero structure of the 2297 matrix and want to provide it to the matrix object to improve the performance 2298 of the MatSetValues() operation. 2299 2300 You MUST have set the correct numbers of nonzeros per row in the call to 2301 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2302 2303 MUST be called before any calls to MatSetValues(); 2304 2305 @*/ 2306 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2307 { 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2312 PetscValidPointer(indices,2); 2313 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2318 { 2319 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2320 PetscErrorCode ierr; 2321 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2322 PetscReal atmp; 2323 PetscScalar *x,zero = 0.0; 2324 MatScalar *aa; 2325 PetscInt ncols,brow,krow,kcol; 2326 2327 PetscFunctionBegin; 2328 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2329 bs = A->rmap->bs; 2330 aa = a->a; 2331 ai = a->i; 2332 aj = a->j; 2333 mbs = a->mbs; 2334 2335 ierr = VecSet(v,zero);CHKERRQ(ierr); 2336 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2337 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2338 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2339 for (i=0; i<mbs; i++) { 2340 ncols = ai[1] - ai[0]; ai++; 2341 brow = bs*i; 2342 for (j=0; j<ncols; j++) { 2343 for (kcol=0; kcol<bs; kcol++) { 2344 for (krow=0; krow<bs; krow++) { 2345 atmp = PetscAbsScalar(*aa);aa++; 2346 row = brow + krow; /* row index */ 2347 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2348 } 2349 } 2350 aj++; 2351 } 2352 } 2353 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2354 PetscFunctionReturn(0); 2355 } 2356 2357 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2358 { 2359 PetscErrorCode ierr; 2360 2361 PetscFunctionBegin; 2362 /* If the two matrices have the same copy implementation, use fast copy. */ 2363 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2364 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2365 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2366 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2367 2368 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2369 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2370 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2371 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2372 } else { 2373 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2374 } 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2379 { 2380 PetscErrorCode ierr; 2381 2382 PetscFunctionBegin; 2383 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2388 { 2389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2390 2391 PetscFunctionBegin; 2392 *array = a->a; 2393 PetscFunctionReturn(0); 2394 } 2395 2396 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2397 { 2398 PetscFunctionBegin; 2399 *array = NULL; 2400 PetscFunctionReturn(0); 2401 } 2402 2403 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2404 { 2405 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2406 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2407 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2408 PetscErrorCode ierr; 2409 2410 PetscFunctionBegin; 2411 /* Set the number of nonzeros in the new matrix */ 2412 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2417 { 2418 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2419 PetscErrorCode ierr; 2420 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2421 PetscBLASInt one=1; 2422 2423 PetscFunctionBegin; 2424 if (str == SAME_NONZERO_PATTERN) { 2425 PetscScalar alpha = a; 2426 PetscBLASInt bnz; 2427 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2428 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2429 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2430 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2431 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2432 } else { 2433 Mat B; 2434 PetscInt *nnz; 2435 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2436 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2437 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2438 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2439 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2440 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2441 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2442 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2443 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2444 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2445 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2446 ierr = PetscFree(nnz);CHKERRQ(ierr); 2447 } 2448 PetscFunctionReturn(0); 2449 } 2450 2451 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2452 { 2453 #if defined(PETSC_USE_COMPLEX) 2454 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2455 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2456 MatScalar *aa = a->a; 2457 2458 PetscFunctionBegin; 2459 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2460 #else 2461 PetscFunctionBegin; 2462 #endif 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2467 { 2468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2469 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2470 MatScalar *aa = a->a; 2471 2472 PetscFunctionBegin; 2473 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2478 { 2479 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2480 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2481 MatScalar *aa = a->a; 2482 2483 PetscFunctionBegin; 2484 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 /* 2489 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2490 */ 2491 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2492 { 2493 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2494 PetscErrorCode ierr; 2495 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2496 PetscInt nz = a->i[m],row,*jj,mr,col; 2497 2498 PetscFunctionBegin; 2499 *nn = n; 2500 if (!ia) PetscFunctionReturn(0); 2501 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2502 else { 2503 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2504 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2505 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2506 jj = a->j; 2507 for (i=0; i<nz; i++) { 2508 collengths[jj[i]]++; 2509 } 2510 cia[0] = oshift; 2511 for (i=0; i<n; i++) { 2512 cia[i+1] = cia[i] + collengths[i]; 2513 } 2514 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2515 jj = a->j; 2516 for (row=0; row<m; row++) { 2517 mr = a->i[row+1] - a->i[row]; 2518 for (i=0; i<mr; i++) { 2519 col = *jj++; 2520 2521 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2522 } 2523 } 2524 ierr = PetscFree(collengths);CHKERRQ(ierr); 2525 *ia = cia; *ja = cja; 2526 } 2527 PetscFunctionReturn(0); 2528 } 2529 2530 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2531 { 2532 PetscErrorCode ierr; 2533 2534 PetscFunctionBegin; 2535 if (!ia) PetscFunctionReturn(0); 2536 ierr = PetscFree(*ia);CHKERRQ(ierr); 2537 ierr = PetscFree(*ja);CHKERRQ(ierr); 2538 PetscFunctionReturn(0); 2539 } 2540 2541 /* 2542 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2543 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2544 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2545 */ 2546 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2547 { 2548 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2549 PetscErrorCode ierr; 2550 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2551 PetscInt nz = a->i[m],row,*jj,mr,col; 2552 PetscInt *cspidx; 2553 2554 PetscFunctionBegin; 2555 *nn = n; 2556 if (!ia) PetscFunctionReturn(0); 2557 2558 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2559 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2560 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2561 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2562 jj = a->j; 2563 for (i=0; i<nz; i++) { 2564 collengths[jj[i]]++; 2565 } 2566 cia[0] = oshift; 2567 for (i=0; i<n; i++) { 2568 cia[i+1] = cia[i] + collengths[i]; 2569 } 2570 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2571 jj = a->j; 2572 for (row=0; row<m; row++) { 2573 mr = a->i[row+1] - a->i[row]; 2574 for (i=0; i<mr; i++) { 2575 col = *jj++; 2576 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2577 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2578 } 2579 } 2580 ierr = PetscFree(collengths);CHKERRQ(ierr); 2581 *ia = cia; 2582 *ja = cja; 2583 *spidx = cspidx; 2584 PetscFunctionReturn(0); 2585 } 2586 2587 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2588 { 2589 PetscErrorCode ierr; 2590 2591 PetscFunctionBegin; 2592 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2593 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2598 { 2599 PetscErrorCode ierr; 2600 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2601 2602 PetscFunctionBegin; 2603 if (!Y->preallocated || !aij->nz) { 2604 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2605 } 2606 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2607 PetscFunctionReturn(0); 2608 } 2609 2610 /* -------------------------------------------------------------------*/ 2611 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2612 MatGetRow_SeqBAIJ, 2613 MatRestoreRow_SeqBAIJ, 2614 MatMult_SeqBAIJ_N, 2615 /* 4*/ MatMultAdd_SeqBAIJ_N, 2616 MatMultTranspose_SeqBAIJ, 2617 MatMultTransposeAdd_SeqBAIJ, 2618 NULL, 2619 NULL, 2620 NULL, 2621 /* 10*/ NULL, 2622 MatLUFactor_SeqBAIJ, 2623 NULL, 2624 NULL, 2625 MatTranspose_SeqBAIJ, 2626 /* 15*/ MatGetInfo_SeqBAIJ, 2627 MatEqual_SeqBAIJ, 2628 MatGetDiagonal_SeqBAIJ, 2629 MatDiagonalScale_SeqBAIJ, 2630 MatNorm_SeqBAIJ, 2631 /* 20*/ NULL, 2632 MatAssemblyEnd_SeqBAIJ, 2633 MatSetOption_SeqBAIJ, 2634 MatZeroEntries_SeqBAIJ, 2635 /* 24*/ MatZeroRows_SeqBAIJ, 2636 NULL, 2637 NULL, 2638 NULL, 2639 NULL, 2640 /* 29*/ MatSetUp_SeqBAIJ, 2641 NULL, 2642 NULL, 2643 NULL, 2644 NULL, 2645 /* 34*/ MatDuplicate_SeqBAIJ, 2646 NULL, 2647 NULL, 2648 MatILUFactor_SeqBAIJ, 2649 NULL, 2650 /* 39*/ MatAXPY_SeqBAIJ, 2651 MatCreateSubMatrices_SeqBAIJ, 2652 MatIncreaseOverlap_SeqBAIJ, 2653 MatGetValues_SeqBAIJ, 2654 MatCopy_SeqBAIJ, 2655 /* 44*/ NULL, 2656 MatScale_SeqBAIJ, 2657 MatShift_SeqBAIJ, 2658 NULL, 2659 MatZeroRowsColumns_SeqBAIJ, 2660 /* 49*/ NULL, 2661 MatGetRowIJ_SeqBAIJ, 2662 MatRestoreRowIJ_SeqBAIJ, 2663 MatGetColumnIJ_SeqBAIJ, 2664 MatRestoreColumnIJ_SeqBAIJ, 2665 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2666 NULL, 2667 NULL, 2668 MatPermute_SeqBAIJ, 2669 MatSetValuesBlocked_SeqBAIJ, 2670 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2671 MatDestroy_SeqBAIJ, 2672 MatView_SeqBAIJ, 2673 NULL, 2674 NULL, 2675 /* 64*/ NULL, 2676 NULL, 2677 NULL, 2678 NULL, 2679 NULL, 2680 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2681 NULL, 2682 MatConvert_Basic, 2683 NULL, 2684 NULL, 2685 /* 74*/ NULL, 2686 MatFDColoringApply_BAIJ, 2687 NULL, 2688 NULL, 2689 NULL, 2690 /* 79*/ NULL, 2691 NULL, 2692 NULL, 2693 NULL, 2694 MatLoad_SeqBAIJ, 2695 /* 84*/ NULL, 2696 NULL, 2697 NULL, 2698 NULL, 2699 NULL, 2700 /* 89*/ NULL, 2701 NULL, 2702 NULL, 2703 NULL, 2704 NULL, 2705 /* 94*/ NULL, 2706 NULL, 2707 NULL, 2708 NULL, 2709 NULL, 2710 /* 99*/ NULL, 2711 NULL, 2712 NULL, 2713 MatConjugate_SeqBAIJ, 2714 NULL, 2715 /*104*/ NULL, 2716 MatRealPart_SeqBAIJ, 2717 MatImaginaryPart_SeqBAIJ, 2718 NULL, 2719 NULL, 2720 /*109*/ NULL, 2721 NULL, 2722 NULL, 2723 NULL, 2724 MatMissingDiagonal_SeqBAIJ, 2725 /*114*/ NULL, 2726 NULL, 2727 NULL, 2728 NULL, 2729 NULL, 2730 /*119*/ NULL, 2731 NULL, 2732 MatMultHermitianTranspose_SeqBAIJ, 2733 MatMultHermitianTransposeAdd_SeqBAIJ, 2734 NULL, 2735 /*124*/ NULL, 2736 NULL, 2737 MatInvertBlockDiagonal_SeqBAIJ, 2738 NULL, 2739 NULL, 2740 /*129*/ NULL, 2741 NULL, 2742 NULL, 2743 NULL, 2744 NULL, 2745 /*134*/ NULL, 2746 NULL, 2747 NULL, 2748 NULL, 2749 NULL, 2750 /*139*/ MatSetBlockSizes_Default, 2751 NULL, 2752 NULL, 2753 MatFDColoringSetUp_SeqXAIJ, 2754 NULL, 2755 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2756 MatDestroySubMatrices_SeqBAIJ 2757 }; 2758 2759 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2760 { 2761 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2762 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2763 PetscErrorCode ierr; 2764 2765 PetscFunctionBegin; 2766 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2767 2768 /* allocate space for values if not already there */ 2769 if (!aij->saved_values) { 2770 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2771 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2772 } 2773 2774 /* copy values over */ 2775 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2776 PetscFunctionReturn(0); 2777 } 2778 2779 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2780 { 2781 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2782 PetscErrorCode ierr; 2783 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2784 2785 PetscFunctionBegin; 2786 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2787 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2788 2789 /* copy values over */ 2790 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2791 PetscFunctionReturn(0); 2792 } 2793 2794 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2795 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2796 2797 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2798 { 2799 Mat_SeqBAIJ *b; 2800 PetscErrorCode ierr; 2801 PetscInt i,mbs,nbs,bs2; 2802 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2803 2804 PetscFunctionBegin; 2805 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2806 if (nz == MAT_SKIP_ALLOCATION) { 2807 skipallocation = PETSC_TRUE; 2808 nz = 0; 2809 } 2810 2811 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2812 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2813 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2814 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2815 2816 B->preallocated = PETSC_TRUE; 2817 2818 mbs = B->rmap->n/bs; 2819 nbs = B->cmap->n/bs; 2820 bs2 = bs*bs; 2821 2822 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2823 2824 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2825 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2826 if (nnz) { 2827 for (i=0; i<mbs; i++) { 2828 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2829 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2830 } 2831 } 2832 2833 b = (Mat_SeqBAIJ*)B->data; 2834 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2835 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2836 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2837 2838 if (!flg) { 2839 switch (bs) { 2840 case 1: 2841 B->ops->mult = MatMult_SeqBAIJ_1; 2842 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2843 break; 2844 case 2: 2845 B->ops->mult = MatMult_SeqBAIJ_2; 2846 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2847 break; 2848 case 3: 2849 B->ops->mult = MatMult_SeqBAIJ_3; 2850 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2851 break; 2852 case 4: 2853 B->ops->mult = MatMult_SeqBAIJ_4; 2854 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2855 break; 2856 case 5: 2857 B->ops->mult = MatMult_SeqBAIJ_5; 2858 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2859 break; 2860 case 6: 2861 B->ops->mult = MatMult_SeqBAIJ_6; 2862 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2863 break; 2864 case 7: 2865 B->ops->mult = MatMult_SeqBAIJ_7; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2867 break; 2868 case 9: 2869 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2870 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2872 #else 2873 B->ops->mult = MatMult_SeqBAIJ_N; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2875 #endif 2876 break; 2877 case 11: 2878 B->ops->mult = MatMult_SeqBAIJ_11; 2879 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2880 break; 2881 case 15: 2882 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2883 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2884 break; 2885 default: 2886 B->ops->mult = MatMult_SeqBAIJ_N; 2887 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2888 break; 2889 } 2890 } 2891 B->ops->sor = MatSOR_SeqBAIJ; 2892 b->mbs = mbs; 2893 b->nbs = nbs; 2894 if (!skipallocation) { 2895 if (!b->imax) { 2896 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2897 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2898 2899 b->free_imax_ilen = PETSC_TRUE; 2900 } 2901 /* b->ilen will count nonzeros in each block row so far. */ 2902 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2903 if (!nnz) { 2904 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2905 else if (nz < 0) nz = 1; 2906 nz = PetscMin(nz,nbs); 2907 for (i=0; i<mbs; i++) b->imax[i] = nz; 2908 ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr); 2909 } else { 2910 PetscInt64 nz64 = 0; 2911 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 2912 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 2913 } 2914 2915 /* allocate the matrix space */ 2916 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2917 if (B->structure_only) { 2918 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2919 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2920 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2921 } else { 2922 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2923 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2924 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2925 } 2926 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 2927 2928 if (B->structure_only) { 2929 b->singlemalloc = PETSC_FALSE; 2930 b->free_a = PETSC_FALSE; 2931 } else { 2932 b->singlemalloc = PETSC_TRUE; 2933 b->free_a = PETSC_TRUE; 2934 } 2935 b->free_ij = PETSC_TRUE; 2936 2937 b->i[0] = 0; 2938 for (i=1; i<mbs+1; i++) { 2939 b->i[i] = b->i[i-1] + b->imax[i-1]; 2940 } 2941 2942 } else { 2943 b->free_a = PETSC_FALSE; 2944 b->free_ij = PETSC_FALSE; 2945 } 2946 2947 b->bs2 = bs2; 2948 b->mbs = mbs; 2949 b->nz = 0; 2950 b->maxnz = nz; 2951 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2952 B->was_assembled = PETSC_FALSE; 2953 B->assembled = PETSC_FALSE; 2954 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2955 PetscFunctionReturn(0); 2956 } 2957 2958 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2959 { 2960 PetscInt i,m,nz,nz_max=0,*nnz; 2961 PetscScalar *values=NULL; 2962 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2963 PetscErrorCode ierr; 2964 2965 PetscFunctionBegin; 2966 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2967 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2968 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2969 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2970 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2971 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2972 m = B->rmap->n/bs; 2973 2974 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2975 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2976 for (i=0; i<m; i++) { 2977 nz = ii[i+1]- ii[i]; 2978 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2979 nz_max = PetscMax(nz_max, nz); 2980 nnz[i] = nz; 2981 } 2982 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2983 ierr = PetscFree(nnz);CHKERRQ(ierr); 2984 2985 values = (PetscScalar*)V; 2986 if (!values) { 2987 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2988 } 2989 for (i=0; i<m; i++) { 2990 PetscInt ncols = ii[i+1] - ii[i]; 2991 const PetscInt *icols = jj + ii[i]; 2992 if (bs == 1 || !roworiented) { 2993 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2994 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2995 } else { 2996 PetscInt j; 2997 for (j=0; j<ncols; j++) { 2998 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2999 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3000 } 3001 } 3002 } 3003 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3004 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3006 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3007 PetscFunctionReturn(0); 3008 } 3009 3010 /*@C 3011 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3012 3013 Not Collective 3014 3015 Input Parameter: 3016 . mat - a MATSEQBAIJ matrix 3017 3018 Output Parameter: 3019 . array - pointer to the data 3020 3021 Level: intermediate 3022 3023 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3024 @*/ 3025 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3026 { 3027 PetscErrorCode ierr; 3028 3029 PetscFunctionBegin; 3030 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3031 PetscFunctionReturn(0); 3032 } 3033 3034 /*@C 3035 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3036 3037 Not Collective 3038 3039 Input Parameters: 3040 + mat - a MATSEQBAIJ matrix 3041 - array - pointer to the data 3042 3043 Level: intermediate 3044 3045 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3046 @*/ 3047 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3048 { 3049 PetscErrorCode ierr; 3050 3051 PetscFunctionBegin; 3052 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3053 PetscFunctionReturn(0); 3054 } 3055 3056 /*MC 3057 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3058 block sparse compressed row format. 3059 3060 Options Database Keys: 3061 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3062 3063 Level: beginner 3064 3065 Notes: 3066 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3067 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3068 3069 .seealso: MatCreateSeqBAIJ() 3070 M*/ 3071 3072 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3073 3074 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3075 { 3076 PetscErrorCode ierr; 3077 PetscMPIInt size; 3078 Mat_SeqBAIJ *b; 3079 3080 PetscFunctionBegin; 3081 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 3082 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3083 3084 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3085 B->data = (void*)b; 3086 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3087 3088 b->row = NULL; 3089 b->col = NULL; 3090 b->icol = NULL; 3091 b->reallocs = 0; 3092 b->saved_values = NULL; 3093 3094 b->roworiented = PETSC_TRUE; 3095 b->nonew = 0; 3096 b->diag = NULL; 3097 B->spptr = NULL; 3098 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3099 b->keepnonzeropattern = PETSC_FALSE; 3100 3101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3105 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3106 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3107 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3108 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3109 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3110 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3111 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3112 #if defined(PETSC_HAVE_HYPRE) 3113 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3114 #endif 3115 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3116 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3117 PetscFunctionReturn(0); 3118 } 3119 3120 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3121 { 3122 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3123 PetscErrorCode ierr; 3124 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3125 3126 PetscFunctionBegin; 3127 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3128 3129 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3130 c->imax = a->imax; 3131 c->ilen = a->ilen; 3132 c->free_imax_ilen = PETSC_FALSE; 3133 } else { 3134 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3135 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3136 for (i=0; i<mbs; i++) { 3137 c->imax[i] = a->imax[i]; 3138 c->ilen[i] = a->ilen[i]; 3139 } 3140 c->free_imax_ilen = PETSC_TRUE; 3141 } 3142 3143 /* allocate the matrix space */ 3144 if (mallocmatspace) { 3145 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3146 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3147 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3148 3149 c->i = a->i; 3150 c->j = a->j; 3151 c->singlemalloc = PETSC_FALSE; 3152 c->free_a = PETSC_TRUE; 3153 c->free_ij = PETSC_FALSE; 3154 c->parent = A; 3155 C->preallocated = PETSC_TRUE; 3156 C->assembled = PETSC_TRUE; 3157 3158 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3159 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3160 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3161 } else { 3162 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3163 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3164 3165 c->singlemalloc = PETSC_TRUE; 3166 c->free_a = PETSC_TRUE; 3167 c->free_ij = PETSC_TRUE; 3168 3169 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3170 if (mbs > 0) { 3171 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3172 if (cpvalues == MAT_COPY_VALUES) { 3173 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3174 } else { 3175 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3176 } 3177 } 3178 C->preallocated = PETSC_TRUE; 3179 C->assembled = PETSC_TRUE; 3180 } 3181 } 3182 3183 c->roworiented = a->roworiented; 3184 c->nonew = a->nonew; 3185 3186 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3187 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3188 3189 c->bs2 = a->bs2; 3190 c->mbs = a->mbs; 3191 c->nbs = a->nbs; 3192 3193 if (a->diag) { 3194 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3195 c->diag = a->diag; 3196 c->free_diag = PETSC_FALSE; 3197 } else { 3198 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3199 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3200 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3201 c->free_diag = PETSC_TRUE; 3202 } 3203 } else c->diag = NULL; 3204 3205 c->nz = a->nz; 3206 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3207 c->solve_work = NULL; 3208 c->mult_work = NULL; 3209 c->sor_workt = NULL; 3210 c->sor_work = NULL; 3211 3212 c->compressedrow.use = a->compressedrow.use; 3213 c->compressedrow.nrows = a->compressedrow.nrows; 3214 if (a->compressedrow.use) { 3215 i = a->compressedrow.nrows; 3216 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3217 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3218 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3219 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3220 } else { 3221 c->compressedrow.use = PETSC_FALSE; 3222 c->compressedrow.i = NULL; 3223 c->compressedrow.rindex = NULL; 3224 } 3225 C->nonzerostate = A->nonzerostate; 3226 3227 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3228 PetscFunctionReturn(0); 3229 } 3230 3231 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3232 { 3233 PetscErrorCode ierr; 3234 3235 PetscFunctionBegin; 3236 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3237 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3238 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3239 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3240 PetscFunctionReturn(0); 3241 } 3242 3243 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3244 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3245 { 3246 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3247 PetscInt *rowidxs,*colidxs; 3248 PetscScalar *matvals; 3249 PetscErrorCode ierr; 3250 3251 PetscFunctionBegin; 3252 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3253 3254 /* read matrix header */ 3255 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3256 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3257 M = header[1]; N = header[2]; nz = header[3]; 3258 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3259 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3260 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3261 3262 /* set block sizes from the viewer's .info file */ 3263 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3264 /* set local and global sizes if not set already */ 3265 if (mat->rmap->n < 0) mat->rmap->n = M; 3266 if (mat->cmap->n < 0) mat->cmap->n = N; 3267 if (mat->rmap->N < 0) mat->rmap->N = M; 3268 if (mat->cmap->N < 0) mat->cmap->N = N; 3269 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3270 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3271 3272 /* check if the matrix sizes are correct */ 3273 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3274 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3275 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3276 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3277 mbs = m/bs; nbs = n/bs; 3278 3279 /* read in row lengths, column indices and nonzero values */ 3280 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3281 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3282 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3283 sum = rowidxs[m]; 3284 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3285 3286 /* read in column indices and nonzero values */ 3287 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3288 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3289 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3290 3291 { /* preallocate matrix storage */ 3292 PetscBT bt; /* helper bit set to count nonzeros */ 3293 PetscInt *nnz; 3294 PetscBool sbaij; 3295 3296 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3297 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3298 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3299 for (i=0; i<mbs; i++) { 3300 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3301 for (k=0; k<bs; k++) { 3302 PetscInt row = bs*i + k; 3303 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3304 PetscInt col = colidxs[j]; 3305 if (!sbaij || col >= row) 3306 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3307 } 3308 } 3309 } 3310 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3311 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3312 ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3313 ierr = PetscFree(nnz);CHKERRQ(ierr); 3314 } 3315 3316 /* store matrix values */ 3317 for (i=0; i<m; i++) { 3318 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3319 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3320 } 3321 3322 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3323 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3324 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3325 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3326 PetscFunctionReturn(0); 3327 } 3328 3329 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3330 { 3331 PetscErrorCode ierr; 3332 PetscBool isbinary; 3333 3334 PetscFunctionBegin; 3335 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3336 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3337 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3338 PetscFunctionReturn(0); 3339 } 3340 3341 /*@C 3342 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3343 compressed row) format. For good matrix assembly performance the 3344 user should preallocate the matrix storage by setting the parameter nz 3345 (or the array nnz). By setting these parameters accurately, performance 3346 during matrix assembly can be increased by more than a factor of 50. 3347 3348 Collective 3349 3350 Input Parameters: 3351 + comm - MPI communicator, set to PETSC_COMM_SELF 3352 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3353 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3354 . m - number of rows 3355 . n - number of columns 3356 . nz - number of nonzero blocks per block row (same for all rows) 3357 - nnz - array containing the number of nonzero blocks in the various block rows 3358 (possibly different for each block row) or NULL 3359 3360 Output Parameter: 3361 . A - the matrix 3362 3363 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3364 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3365 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3366 3367 Options Database Keys: 3368 + -mat_no_unroll - uses code that does not unroll the loops in the 3369 block calculations (much slower) 3370 - -mat_block_size - size of the blocks to use 3371 3372 Level: intermediate 3373 3374 Notes: 3375 The number of rows and columns must be divisible by blocksize. 3376 3377 If the nnz parameter is given then the nz parameter is ignored 3378 3379 A nonzero block is any block that as 1 or more nonzeros in it 3380 3381 The block AIJ format is fully compatible with standard Fortran 77 3382 storage. That is, the stored row and column indices can begin at 3383 either one (as in Fortran) or zero. See the users' manual for details. 3384 3385 Specify the preallocated storage with either nz or nnz (not both). 3386 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3387 allocation. See Users-Manual: ch_mat for details. 3388 matrices. 3389 3390 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3391 @*/ 3392 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3393 { 3394 PetscErrorCode ierr; 3395 3396 PetscFunctionBegin; 3397 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3398 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3399 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3400 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3401 PetscFunctionReturn(0); 3402 } 3403 3404 /*@C 3405 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3406 per row in the matrix. For good matrix assembly performance the 3407 user should preallocate the matrix storage by setting the parameter nz 3408 (or the array nnz). By setting these parameters accurately, performance 3409 during matrix assembly can be increased by more than a factor of 50. 3410 3411 Collective 3412 3413 Input Parameters: 3414 + B - the matrix 3415 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3416 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3417 . nz - number of block nonzeros per block row (same for all rows) 3418 - nnz - array containing the number of block nonzeros in the various block rows 3419 (possibly different for each block row) or NULL 3420 3421 Options Database Keys: 3422 + -mat_no_unroll - uses code that does not unroll the loops in the 3423 block calculations (much slower) 3424 - -mat_block_size - size of the blocks to use 3425 3426 Level: intermediate 3427 3428 Notes: 3429 If the nnz parameter is given then the nz parameter is ignored 3430 3431 You can call MatGetInfo() to get information on how effective the preallocation was; 3432 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3433 You can also run with the option -info and look for messages with the string 3434 malloc in them to see if additional memory allocation was needed. 3435 3436 The block AIJ format is fully compatible with standard Fortran 77 3437 storage. That is, the stored row and column indices can begin at 3438 either one (as in Fortran) or zero. See the users' manual for details. 3439 3440 Specify the preallocated storage with either nz or nnz (not both). 3441 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3442 allocation. See Users-Manual: ch_mat for details. 3443 3444 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3445 @*/ 3446 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3447 { 3448 PetscErrorCode ierr; 3449 3450 PetscFunctionBegin; 3451 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3452 PetscValidType(B,1); 3453 PetscValidLogicalCollectiveInt(B,bs,2); 3454 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3455 PetscFunctionReturn(0); 3456 } 3457 3458 /*@C 3459 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3460 3461 Collective 3462 3463 Input Parameters: 3464 + B - the matrix 3465 . i - the indices into j for the start of each local row (starts with zero) 3466 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3467 - v - optional values in the matrix 3468 3469 Level: advanced 3470 3471 Notes: 3472 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3473 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3474 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3475 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3476 block column and the second index is over columns within a block. 3477 3478 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3479 3480 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3481 @*/ 3482 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3483 { 3484 PetscErrorCode ierr; 3485 3486 PetscFunctionBegin; 3487 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3488 PetscValidType(B,1); 3489 PetscValidLogicalCollectiveInt(B,bs,2); 3490 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 3495 /*@ 3496 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3497 3498 Collective 3499 3500 Input Parameters: 3501 + comm - must be an MPI communicator of size 1 3502 . bs - size of block 3503 . m - number of rows 3504 . n - number of columns 3505 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3506 . j - column indices 3507 - a - matrix values 3508 3509 Output Parameter: 3510 . mat - the matrix 3511 3512 Level: advanced 3513 3514 Notes: 3515 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3516 once the matrix is destroyed 3517 3518 You cannot set new nonzero locations into this matrix, that will generate an error. 3519 3520 The i and j indices are 0 based 3521 3522 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3523 3524 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3525 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3526 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3527 with column-major ordering within blocks. 3528 3529 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3530 3531 @*/ 3532 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3533 { 3534 PetscErrorCode ierr; 3535 PetscInt ii; 3536 Mat_SeqBAIJ *baij; 3537 3538 PetscFunctionBegin; 3539 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3540 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3541 3542 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3543 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3544 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3545 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3546 baij = (Mat_SeqBAIJ*)(*mat)->data; 3547 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3548 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3549 3550 baij->i = i; 3551 baij->j = j; 3552 baij->a = a; 3553 3554 baij->singlemalloc = PETSC_FALSE; 3555 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3556 baij->free_a = PETSC_FALSE; 3557 baij->free_ij = PETSC_FALSE; 3558 3559 for (ii=0; ii<m; ii++) { 3560 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3561 if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3562 } 3563 if (PetscDefined(USE_DEBUG)) { 3564 for (ii=0; ii<baij->i[m]; ii++) { 3565 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3566 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3567 } 3568 } 3569 3570 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3571 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3572 PetscFunctionReturn(0); 3573 } 3574 3575 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3576 { 3577 PetscErrorCode ierr; 3578 PetscMPIInt size; 3579 3580 PetscFunctionBegin; 3581 ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3582 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3583 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3584 } else { 3585 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3586 } 3587 PetscFunctionReturn(0); 3588 } 3589