1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 916 stepval = (n-1)*4; 917 for (k=0; k<m; k++) { /* loop over added rows */ 918 row = im[k]; 919 rp = aj + ai[row]; 920 ap = aa + 16*ai[row]; 921 nrow = ailen[row]; 922 low = 0; 923 high = nrow; 924 for (l=0; l<n; l++) { /* loop over added columns */ 925 col = in[l]; 926 if (col <= lastcol) low = 0; 927 else high = nrow; 928 lastcol = col; 929 value = v + k*(stepval+4 + l)*4; 930 while (high-low > 7) { 931 t = (low+high)/2; 932 if (rp[t] > col) high = t; 933 else low = t; 934 } 935 for (i=low; i<high; i++) { 936 if (rp[i] > col) break; 937 if (rp[i] == col) { 938 bap = ap + 16*i; 939 for (ii=0; ii<4; ii++,value+=stepval) { 940 for (jj=ii; jj<16; jj+=4) { 941 bap[jj] += *value++; 942 } 943 } 944 goto noinsert2; 945 } 946 } 947 N = nrow++ - 1; 948 high++; /* added new column index thus must search to one higher than before */ 949 /* shift up all the later entries in this row */ 950 for (ii=N; ii>=i; ii--) { 951 rp[ii+1] = rp[ii]; 952 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 953 } 954 if (N >= i) { 955 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 956 } 957 rp[i] = col; 958 bap = ap + 16*i; 959 for (ii=0; ii<4; ii++,value+=stepval) { 960 for (jj=ii; jj<16; jj+=4) { 961 bap[jj] = *value++; 962 } 963 } 964 noinsert2:; 965 low = i; 966 } 967 ailen[row] = nrow; 968 } 969 PetscFunctionReturnVoid(); 970 } 971 972 #if defined(PETSC_HAVE_FORTRAN_CAPS) 973 #define matsetvalues4_ MATSETVALUES4 974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 975 #define matsetvalues4_ matsetvalues4 976 #endif 977 978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 979 { 980 Mat A = *AA; 981 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 982 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 983 PetscInt *ai=a->i,*ailen=a->ilen; 984 PetscInt *aj=a->j,brow,bcol; 985 PetscInt ridx,cidx,lastcol = -1; 986 MatScalar *ap,value,*aa=a->a,*bap; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 for (k=0; k<m; k++) { /* loop over added rows */ 991 row = im[k]; brow = row/4; 992 rp = aj + ai[brow]; 993 ap = aa + 16*ai[brow]; 994 nrow = ailen[brow]; 995 low = 0; 996 high = nrow; 997 for (l=0; l<n; l++) { /* loop over added columns */ 998 col = in[l]; bcol = col/4; 999 ridx = row % 4; cidx = col % 4; 1000 value = v[l + k*n]; 1001 if (col <= lastcol) low = 0; 1002 else high = nrow; 1003 lastcol = col; 1004 while (high-low > 7) { 1005 t = (low+high)/2; 1006 if (rp[t] > bcol) high = t; 1007 else low = t; 1008 } 1009 for (i=low; i<high; i++) { 1010 if (rp[i] > bcol) break; 1011 if (rp[i] == bcol) { 1012 bap = ap + 16*i + 4*cidx + ridx; 1013 *bap += value; 1014 goto noinsert1; 1015 } 1016 } 1017 N = nrow++ - 1; 1018 high++; /* added new column thus must search to one higher than before */ 1019 /* shift up all the later entries in this row */ 1020 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1021 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1022 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1023 rp[i] = bcol; 1024 ap[16*i + 4*cidx + ridx] = value; 1025 noinsert1:; 1026 low = i; 1027 } 1028 ailen[brow] = nrow; 1029 } 1030 PetscFunctionReturnVoid(); 1031 } 1032 1033 /* 1034 Checks for missing diagonals 1035 */ 1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1037 { 1038 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1039 PetscErrorCode ierr; 1040 PetscInt *diag,*ii = a->i,i; 1041 1042 PetscFunctionBegin; 1043 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1044 *missing = PETSC_FALSE; 1045 if (A->rmap->n > 0 && !ii) { 1046 *missing = PETSC_TRUE; 1047 if (d) *d = 0; 1048 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1049 } else { 1050 diag = a->diag; 1051 for (i=0; i<a->mbs; i++) { 1052 if (diag[i] >= ii[i+1]) { 1053 *missing = PETSC_TRUE; 1054 if (d) *d = i; 1055 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1056 break; 1057 } 1058 } 1059 } 1060 PetscFunctionReturn(0); 1061 } 1062 1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1064 { 1065 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1066 PetscErrorCode ierr; 1067 PetscInt i,j,m = a->mbs; 1068 1069 PetscFunctionBegin; 1070 if (!a->diag) { 1071 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1072 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1073 a->free_diag = PETSC_TRUE; 1074 } 1075 for (i=0; i<m; i++) { 1076 a->diag[i] = a->i[i+1]; 1077 for (j=a->i[i]; j<a->i[i+1]; j++) { 1078 if (a->j[j] == i) { 1079 a->diag[i] = j; 1080 break; 1081 } 1082 } 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 1088 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1089 { 1090 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1093 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1094 1095 PetscFunctionBegin; 1096 *nn = n; 1097 if (!ia) PetscFunctionReturn(0); 1098 if (symmetric) { 1099 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1100 nz = tia[n]; 1101 } else { 1102 tia = a->i; tja = a->j; 1103 } 1104 1105 if (!blockcompressed && bs > 1) { 1106 (*nn) *= bs; 1107 /* malloc & create the natural set of indices */ 1108 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1109 if (n) { 1110 (*ia)[0] = oshift; 1111 for (j=1; j<bs; j++) { 1112 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1113 } 1114 } 1115 1116 for (i=1; i<n; i++) { 1117 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1118 for (j=1; j<bs; j++) { 1119 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1120 } 1121 } 1122 if (n) { 1123 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1124 } 1125 1126 if (inja) { 1127 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1128 cnt = 0; 1129 for (i=0; i<n; i++) { 1130 for (j=0; j<bs; j++) { 1131 for (k=tia[i]; k<tia[i+1]; k++) { 1132 for (l=0; l<bs; l++) { 1133 (*ja)[cnt++] = bs*tja[k] + l; 1134 } 1135 } 1136 } 1137 } 1138 } 1139 1140 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1141 ierr = PetscFree(tia);CHKERRQ(ierr); 1142 ierr = PetscFree(tja);CHKERRQ(ierr); 1143 } 1144 } else if (oshift == 1) { 1145 if (symmetric) { 1146 nz = tia[A->rmap->n/bs]; 1147 /* add 1 to i and j indices */ 1148 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1149 *ia = tia; 1150 if (ja) { 1151 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1152 *ja = tja; 1153 } 1154 } else { 1155 nz = a->i[A->rmap->n/bs]; 1156 /* malloc space and add 1 to i and j indices */ 1157 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1158 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1159 if (ja) { 1160 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1161 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1162 } 1163 } 1164 } else { 1165 *ia = tia; 1166 if (ja) *ja = tja; 1167 } 1168 PetscFunctionReturn(0); 1169 } 1170 1171 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1172 { 1173 PetscErrorCode ierr; 1174 1175 PetscFunctionBegin; 1176 if (!ia) PetscFunctionReturn(0); 1177 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1178 ierr = PetscFree(*ia);CHKERRQ(ierr); 1179 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1180 } 1181 PetscFunctionReturn(0); 1182 } 1183 1184 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1185 { 1186 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 #if defined(PETSC_USE_LOG) 1191 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1192 #endif 1193 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1194 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1195 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1196 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1197 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1198 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1199 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1200 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1201 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1202 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1203 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1204 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1205 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1206 1207 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1208 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1209 ierr = PetscFree(A->data);CHKERRQ(ierr); 1210 1211 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1212 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1222 #if defined(PETSC_HAVE_HYPRE) 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1224 #endif 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1226 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1231 { 1232 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 switch (op) { 1237 case MAT_ROW_ORIENTED: 1238 a->roworiented = flg; 1239 break; 1240 case MAT_KEEP_NONZERO_PATTERN: 1241 a->keepnonzeropattern = flg; 1242 break; 1243 case MAT_NEW_NONZERO_LOCATIONS: 1244 a->nonew = (flg ? 0 : 1); 1245 break; 1246 case MAT_NEW_NONZERO_LOCATION_ERR: 1247 a->nonew = (flg ? -1 : 0); 1248 break; 1249 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1250 a->nonew = (flg ? -2 : 0); 1251 break; 1252 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1253 a->nounused = (flg ? -1 : 0); 1254 break; 1255 case MAT_NEW_DIAGONALS: 1256 case MAT_IGNORE_OFF_PROC_ENTRIES: 1257 case MAT_USE_HASH_TABLE: 1258 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1259 break; 1260 case MAT_SPD: 1261 case MAT_SYMMETRIC: 1262 case MAT_STRUCTURALLY_SYMMETRIC: 1263 case MAT_HERMITIAN: 1264 case MAT_SYMMETRY_ETERNAL: 1265 case MAT_SUBMAT_SINGLEIS: 1266 case MAT_STRUCTURE_ONLY: 1267 /* These options are handled directly by MatSetOption() */ 1268 break; 1269 default: 1270 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1271 } 1272 PetscFunctionReturn(0); 1273 } 1274 1275 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1276 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1277 { 1278 PetscErrorCode ierr; 1279 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1280 MatScalar *aa_i; 1281 PetscScalar *v_i; 1282 1283 PetscFunctionBegin; 1284 bs = A->rmap->bs; 1285 bs2 = bs*bs; 1286 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1287 1288 bn = row/bs; /* Block number */ 1289 bp = row % bs; /* Block Position */ 1290 M = ai[bn+1] - ai[bn]; 1291 *nz = bs*M; 1292 1293 if (v) { 1294 *v = 0; 1295 if (*nz) { 1296 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1297 for (i=0; i<M; i++) { /* for each block in the block row */ 1298 v_i = *v + i*bs; 1299 aa_i = aa + bs2*(ai[bn] + i); 1300 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1301 } 1302 } 1303 } 1304 1305 if (idx) { 1306 *idx = 0; 1307 if (*nz) { 1308 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1309 for (i=0; i<M; i++) { /* for each block in the block row */ 1310 idx_i = *idx + i*bs; 1311 itmp = bs*aj[ai[bn] + i]; 1312 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1313 } 1314 } 1315 } 1316 PetscFunctionReturn(0); 1317 } 1318 1319 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1320 { 1321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1322 PetscErrorCode ierr; 1323 1324 PetscFunctionBegin; 1325 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1326 PetscFunctionReturn(0); 1327 } 1328 1329 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1330 { 1331 PetscErrorCode ierr; 1332 1333 PetscFunctionBegin; 1334 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1335 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1336 PetscFunctionReturn(0); 1337 } 1338 1339 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1340 { 1341 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1342 Mat C; 1343 PetscErrorCode ierr; 1344 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1345 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1346 MatScalar *ata,*aa=a->a; 1347 1348 PetscFunctionBegin; 1349 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1350 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1351 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1352 1353 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1354 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1355 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1356 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1357 1358 at = (Mat_SeqBAIJ*)C->data; 1359 ati = at->i; 1360 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1361 } else { 1362 C = *B; 1363 at = (Mat_SeqBAIJ*)C->data; 1364 ati = at->i; 1365 } 1366 1367 atj = at->j; 1368 ata = at->a; 1369 1370 /* Copy ati into atfill so we have locations of the next free space in atj */ 1371 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1372 1373 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1374 for (i=0; i<mbs; i++) { 1375 anzj = ai[i+1] - ai[i]; 1376 for (j=0; j<anzj; j++) { 1377 atj[atfill[*aj]] = i; 1378 for (kr=0; kr<bs; kr++) { 1379 for (k=0; k<bs; k++) { 1380 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1381 } 1382 } 1383 atfill[*aj++] += 1; 1384 } 1385 } 1386 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1387 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1388 1389 /* Clean up temporary space and complete requests. */ 1390 ierr = PetscFree(atfill);CHKERRQ(ierr); 1391 1392 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1393 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1394 *B = C; 1395 } else { 1396 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1402 { 1403 PetscErrorCode ierr; 1404 Mat Btrans; 1405 1406 PetscFunctionBegin; 1407 *f = PETSC_FALSE; 1408 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1409 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1410 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1415 { 1416 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1417 PetscErrorCode ierr; 1418 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1419 int fd; 1420 PetscScalar *aa; 1421 FILE *file; 1422 1423 PetscFunctionBegin; 1424 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1425 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1426 col_lens[0] = MAT_FILE_CLASSID; 1427 1428 col_lens[1] = A->rmap->N; 1429 col_lens[2] = A->cmap->n; 1430 col_lens[3] = a->nz*bs2; 1431 1432 /* store lengths of each row and write (including header) to file */ 1433 count = 0; 1434 for (i=0; i<a->mbs; i++) { 1435 for (j=0; j<bs; j++) { 1436 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1437 } 1438 } 1439 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1440 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1441 1442 /* store column indices (zero start index) */ 1443 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1444 count = 0; 1445 for (i=0; i<a->mbs; i++) { 1446 for (j=0; j<bs; j++) { 1447 for (k=a->i[i]; k<a->i[i+1]; k++) { 1448 for (l=0; l<bs; l++) { 1449 jj[count++] = bs*a->j[k] + l; 1450 } 1451 } 1452 } 1453 } 1454 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1455 ierr = PetscFree(jj);CHKERRQ(ierr); 1456 1457 /* store nonzero values */ 1458 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1459 count = 0; 1460 for (i=0; i<a->mbs; i++) { 1461 for (j=0; j<bs; j++) { 1462 for (k=a->i[i]; k<a->i[i+1]; k++) { 1463 for (l=0; l<bs; l++) { 1464 aa[count++] = a->a[bs2*k + l*bs + j]; 1465 } 1466 } 1467 } 1468 } 1469 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1470 ierr = PetscFree(aa);CHKERRQ(ierr); 1471 1472 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1473 if (file) { 1474 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1475 } 1476 PetscFunctionReturn(0); 1477 } 1478 1479 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1480 { 1481 PetscErrorCode ierr; 1482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1483 PetscInt i,bs = A->rmap->bs,k; 1484 1485 PetscFunctionBegin; 1486 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1487 for (i=0; i<a->mbs; i++) { 1488 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1489 for (k=a->i[i]; k<a->i[i+1]; k++) { 1490 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1491 } 1492 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1493 } 1494 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1495 PetscFunctionReturn(0); 1496 } 1497 1498 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1499 { 1500 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1501 PetscErrorCode ierr; 1502 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1503 PetscViewerFormat format; 1504 1505 PetscFunctionBegin; 1506 if (A->structure_only) { 1507 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1508 PetscFunctionReturn(0); 1509 } 1510 1511 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1512 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1513 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1514 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1515 const char *matname; 1516 Mat aij; 1517 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1518 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1519 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1520 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1521 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1522 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1523 PetscFunctionReturn(0); 1524 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1525 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1526 for (i=0; i<a->mbs; i++) { 1527 for (j=0; j<bs; j++) { 1528 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1529 for (k=a->i[i]; k<a->i[i+1]; k++) { 1530 for (l=0; l<bs; l++) { 1531 #if defined(PETSC_USE_COMPLEX) 1532 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1533 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1534 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1535 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1536 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1537 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1538 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1540 } 1541 #else 1542 if (a->a[bs2*k + l*bs + j] != 0.0) { 1543 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1544 } 1545 #endif 1546 } 1547 } 1548 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1549 } 1550 } 1551 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1552 } else { 1553 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1554 for (i=0; i<a->mbs; i++) { 1555 for (j=0; j<bs; j++) { 1556 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1557 for (k=a->i[i]; k<a->i[i+1]; k++) { 1558 for (l=0; l<bs; l++) { 1559 #if defined(PETSC_USE_COMPLEX) 1560 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1561 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1562 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1563 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1564 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1565 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1566 } else { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1568 } 1569 #else 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1571 #endif 1572 } 1573 } 1574 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1575 } 1576 } 1577 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1578 } 1579 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1580 PetscFunctionReturn(0); 1581 } 1582 1583 #include <petscdraw.h> 1584 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1585 { 1586 Mat A = (Mat) Aa; 1587 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1588 PetscErrorCode ierr; 1589 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1590 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1591 MatScalar *aa; 1592 PetscViewer viewer; 1593 PetscViewerFormat format; 1594 1595 PetscFunctionBegin; 1596 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1597 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1598 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1599 1600 /* loop over matrix elements drawing boxes */ 1601 1602 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1603 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1604 /* Blue for negative, Cyan for zero and Red for positive */ 1605 color = PETSC_DRAW_BLUE; 1606 for (i=0,row=0; i<mbs; i++,row+=bs) { 1607 for (j=a->i[i]; j<a->i[i+1]; j++) { 1608 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1609 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1610 aa = a->a + j*bs2; 1611 for (k=0; k<bs; k++) { 1612 for (l=0; l<bs; l++) { 1613 if (PetscRealPart(*aa++) >= 0.) continue; 1614 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1615 } 1616 } 1617 } 1618 } 1619 color = PETSC_DRAW_CYAN; 1620 for (i=0,row=0; i<mbs; i++,row+=bs) { 1621 for (j=a->i[i]; j<a->i[i+1]; j++) { 1622 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1623 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1624 aa = a->a + j*bs2; 1625 for (k=0; k<bs; k++) { 1626 for (l=0; l<bs; l++) { 1627 if (PetscRealPart(*aa++) != 0.) continue; 1628 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1629 } 1630 } 1631 } 1632 } 1633 color = PETSC_DRAW_RED; 1634 for (i=0,row=0; i<mbs; i++,row+=bs) { 1635 for (j=a->i[i]; j<a->i[i+1]; j++) { 1636 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1637 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1638 aa = a->a + j*bs2; 1639 for (k=0; k<bs; k++) { 1640 for (l=0; l<bs; l++) { 1641 if (PetscRealPart(*aa++) <= 0.) continue; 1642 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1643 } 1644 } 1645 } 1646 } 1647 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1648 } else { 1649 /* use contour shading to indicate magnitude of values */ 1650 /* first determine max of all nonzero values */ 1651 PetscReal minv = 0.0, maxv = 0.0; 1652 PetscDraw popup; 1653 1654 for (i=0; i<a->nz*a->bs2; i++) { 1655 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1656 } 1657 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1658 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1659 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1660 1661 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1662 for (i=0,row=0; i<mbs; i++,row+=bs) { 1663 for (j=a->i[i]; j<a->i[i+1]; j++) { 1664 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1665 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1666 aa = a->a + j*bs2; 1667 for (k=0; k<bs; k++) { 1668 for (l=0; l<bs; l++) { 1669 MatScalar v = *aa++; 1670 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1671 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1672 } 1673 } 1674 } 1675 } 1676 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1682 { 1683 PetscErrorCode ierr; 1684 PetscReal xl,yl,xr,yr,w,h; 1685 PetscDraw draw; 1686 PetscBool isnull; 1687 1688 PetscFunctionBegin; 1689 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1690 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1691 if (isnull) PetscFunctionReturn(0); 1692 1693 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1694 xr += w; yr += h; xl = -w; yl = -h; 1695 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1696 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1697 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1698 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1699 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1700 PetscFunctionReturn(0); 1701 } 1702 1703 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1704 { 1705 PetscErrorCode ierr; 1706 PetscBool iascii,isbinary,isdraw; 1707 1708 PetscFunctionBegin; 1709 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1710 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1711 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1712 if (iascii) { 1713 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1714 } else if (isbinary) { 1715 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1716 } else if (isdraw) { 1717 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1718 } else { 1719 Mat B; 1720 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1721 ierr = MatView(B,viewer);CHKERRQ(ierr); 1722 ierr = MatDestroy(&B);CHKERRQ(ierr); 1723 } 1724 PetscFunctionReturn(0); 1725 } 1726 1727 1728 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1729 { 1730 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1731 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1732 PetscInt *ai = a->i,*ailen = a->ilen; 1733 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1734 MatScalar *ap,*aa = a->a; 1735 1736 PetscFunctionBegin; 1737 for (k=0; k<m; k++) { /* loop over rows */ 1738 row = im[k]; brow = row/bs; 1739 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1740 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1741 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1742 nrow = ailen[brow]; 1743 for (l=0; l<n; l++) { /* loop over columns */ 1744 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1745 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1746 col = in[l]; 1747 bcol = col/bs; 1748 cidx = col%bs; 1749 ridx = row%bs; 1750 high = nrow; 1751 low = 0; /* assume unsorted */ 1752 while (high-low > 5) { 1753 t = (low+high)/2; 1754 if (rp[t] > bcol) high = t; 1755 else low = t; 1756 } 1757 for (i=low; i<high; i++) { 1758 if (rp[i] > bcol) break; 1759 if (rp[i] == bcol) { 1760 *v++ = ap[bs2*i+bs*cidx+ridx]; 1761 goto finished; 1762 } 1763 } 1764 *v++ = 0.0; 1765 finished:; 1766 } 1767 } 1768 PetscFunctionReturn(0); 1769 } 1770 1771 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1772 { 1773 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1774 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1775 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1776 PetscErrorCode ierr; 1777 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1778 PetscBool roworiented=a->roworiented; 1779 const PetscScalar *value = v; 1780 MatScalar *ap=NULL,*aa = a->a,*bap; 1781 1782 PetscFunctionBegin; 1783 if (roworiented) { 1784 stepval = (n-1)*bs; 1785 } else { 1786 stepval = (m-1)*bs; 1787 } 1788 for (k=0; k<m; k++) { /* loop over added rows */ 1789 row = im[k]; 1790 if (row < 0) continue; 1791 #if defined(PETSC_USE_DEBUG) 1792 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1793 #endif 1794 rp = aj + ai[row]; 1795 if (!A->structure_only) ap = aa + bs2*ai[row]; 1796 rmax = imax[row]; 1797 nrow = ailen[row]; 1798 low = 0; 1799 high = nrow; 1800 for (l=0; l<n; l++) { /* loop over added columns */ 1801 if (in[l] < 0) continue; 1802 #if defined(PETSC_USE_DEBUG) 1803 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1804 #endif 1805 col = in[l]; 1806 if (!A->structure_only) { 1807 if (roworiented) { 1808 value = v + (k*(stepval+bs) + l)*bs; 1809 } else { 1810 value = v + (l*(stepval+bs) + k)*bs; 1811 } 1812 } 1813 if (col <= lastcol) low = 0; 1814 else high = nrow; 1815 lastcol = col; 1816 while (high-low > 7) { 1817 t = (low+high)/2; 1818 if (rp[t] > col) high = t; 1819 else low = t; 1820 } 1821 for (i=low; i<high; i++) { 1822 if (rp[i] > col) break; 1823 if (rp[i] == col) { 1824 if (A->structure_only) goto noinsert2; 1825 bap = ap + bs2*i; 1826 if (roworiented) { 1827 if (is == ADD_VALUES) { 1828 for (ii=0; ii<bs; ii++,value+=stepval) { 1829 for (jj=ii; jj<bs2; jj+=bs) { 1830 bap[jj] += *value++; 1831 } 1832 } 1833 } else { 1834 for (ii=0; ii<bs; ii++,value+=stepval) { 1835 for (jj=ii; jj<bs2; jj+=bs) { 1836 bap[jj] = *value++; 1837 } 1838 } 1839 } 1840 } else { 1841 if (is == ADD_VALUES) { 1842 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1843 for (jj=0; jj<bs; jj++) { 1844 bap[jj] += value[jj]; 1845 } 1846 bap += bs; 1847 } 1848 } else { 1849 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1850 for (jj=0; jj<bs; jj++) { 1851 bap[jj] = value[jj]; 1852 } 1853 bap += bs; 1854 } 1855 } 1856 } 1857 goto noinsert2; 1858 } 1859 } 1860 if (nonew == 1) goto noinsert2; 1861 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1862 if (A->structure_only) { 1863 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1864 } else { 1865 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1866 } 1867 N = nrow++ - 1; high++; 1868 /* shift up all the later entries in this row */ 1869 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1870 rp[i] = col; 1871 if (!A->structure_only) { 1872 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1873 bap = ap + bs2*i; 1874 if (roworiented) { 1875 for (ii=0; ii<bs; ii++,value+=stepval) { 1876 for (jj=ii; jj<bs2; jj+=bs) { 1877 bap[jj] = *value++; 1878 } 1879 } 1880 } else { 1881 for (ii=0; ii<bs; ii++,value+=stepval) { 1882 for (jj=0; jj<bs; jj++) { 1883 *bap++ = *value++; 1884 } 1885 } 1886 } 1887 } 1888 noinsert2:; 1889 low = i; 1890 } 1891 ailen[row] = nrow; 1892 } 1893 PetscFunctionReturn(0); 1894 } 1895 1896 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1897 { 1898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1899 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1900 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1901 PetscErrorCode ierr; 1902 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1903 MatScalar *aa = a->a,*ap; 1904 PetscReal ratio=0.6; 1905 1906 PetscFunctionBegin; 1907 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1908 1909 if (m) rmax = ailen[0]; 1910 for (i=1; i<mbs; i++) { 1911 /* move each row back by the amount of empty slots (fshift) before it*/ 1912 fshift += imax[i-1] - ailen[i-1]; 1913 rmax = PetscMax(rmax,ailen[i]); 1914 if (fshift) { 1915 ip = aj + ai[i]; 1916 ap = aa + bs2*ai[i]; 1917 N = ailen[i]; 1918 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1919 if (!A->structure_only) { 1920 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1921 } 1922 } 1923 ai[i] = ai[i-1] + ailen[i-1]; 1924 } 1925 if (mbs) { 1926 fshift += imax[mbs-1] - ailen[mbs-1]; 1927 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1928 } 1929 1930 /* reset ilen and imax for each row */ 1931 a->nonzerorowcnt = 0; 1932 if (A->structure_only) { 1933 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1934 } else { /* !A->structure_only */ 1935 for (i=0; i<mbs; i++) { 1936 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1937 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1938 } 1939 } 1940 a->nz = ai[mbs]; 1941 1942 /* diagonals may have moved, so kill the diagonal pointers */ 1943 a->idiagvalid = PETSC_FALSE; 1944 if (fshift && a->diag) { 1945 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1946 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1947 a->diag = 0; 1948 } 1949 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1950 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1951 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1953 1954 A->info.mallocs += a->reallocs; 1955 a->reallocs = 0; 1956 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1957 a->rmax = rmax; 1958 1959 if (!A->structure_only) { 1960 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1961 } 1962 PetscFunctionReturn(0); 1963 } 1964 1965 /* 1966 This function returns an array of flags which indicate the locations of contiguous 1967 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1968 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1969 Assume: sizes should be long enough to hold all the values. 1970 */ 1971 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1972 { 1973 PetscInt i,j,k,row; 1974 PetscBool flg; 1975 1976 PetscFunctionBegin; 1977 for (i=0,j=0; i<n; j++) { 1978 row = idx[i]; 1979 if (row%bs!=0) { /* Not the begining of a block */ 1980 sizes[j] = 1; 1981 i++; 1982 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1983 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1984 i++; 1985 } else { /* Begining of the block, so check if the complete block exists */ 1986 flg = PETSC_TRUE; 1987 for (k=1; k<bs; k++) { 1988 if (row+k != idx[i+k]) { /* break in the block */ 1989 flg = PETSC_FALSE; 1990 break; 1991 } 1992 } 1993 if (flg) { /* No break in the bs */ 1994 sizes[j] = bs; 1995 i += bs; 1996 } else { 1997 sizes[j] = 1; 1998 i++; 1999 } 2000 } 2001 } 2002 *bs_max = j; 2003 PetscFunctionReturn(0); 2004 } 2005 2006 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2007 { 2008 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2009 PetscErrorCode ierr; 2010 PetscInt i,j,k,count,*rows; 2011 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2012 PetscScalar zero = 0.0; 2013 MatScalar *aa; 2014 const PetscScalar *xx; 2015 PetscScalar *bb; 2016 2017 PetscFunctionBegin; 2018 /* fix right hand side if needed */ 2019 if (x && b) { 2020 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2021 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2022 for (i=0; i<is_n; i++) { 2023 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2024 } 2025 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2026 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2027 } 2028 2029 /* Make a copy of the IS and sort it */ 2030 /* allocate memory for rows,sizes */ 2031 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2032 2033 /* copy IS values to rows, and sort them */ 2034 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2035 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2036 2037 if (baij->keepnonzeropattern) { 2038 for (i=0; i<is_n; i++) sizes[i] = 1; 2039 bs_max = is_n; 2040 } else { 2041 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2042 A->nonzerostate++; 2043 } 2044 2045 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2046 row = rows[j]; 2047 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2048 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2049 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2050 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2051 if (diag != (PetscScalar)0.0) { 2052 if (baij->ilen[row/bs] > 0) { 2053 baij->ilen[row/bs] = 1; 2054 baij->j[baij->i[row/bs]] = row/bs; 2055 2056 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2057 } 2058 /* Now insert all the diagonal values for this bs */ 2059 for (k=0; k<bs; k++) { 2060 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2061 } 2062 } else { /* (diag == 0.0) */ 2063 baij->ilen[row/bs] = 0; 2064 } /* end (diag == 0.0) */ 2065 } else { /* (sizes[i] != bs) */ 2066 #if defined(PETSC_USE_DEBUG) 2067 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2068 #endif 2069 for (k=0; k<count; k++) { 2070 aa[0] = zero; 2071 aa += bs; 2072 } 2073 if (diag != (PetscScalar)0.0) { 2074 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2075 } 2076 } 2077 } 2078 2079 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2080 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2085 { 2086 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2087 PetscErrorCode ierr; 2088 PetscInt i,j,k,count; 2089 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2090 PetscScalar zero = 0.0; 2091 MatScalar *aa; 2092 const PetscScalar *xx; 2093 PetscScalar *bb; 2094 PetscBool *zeroed,vecs = PETSC_FALSE; 2095 2096 PetscFunctionBegin; 2097 /* fix right hand side if needed */ 2098 if (x && b) { 2099 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2100 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2101 vecs = PETSC_TRUE; 2102 } 2103 2104 /* zero the columns */ 2105 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2106 for (i=0; i<is_n; i++) { 2107 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2108 zeroed[is_idx[i]] = PETSC_TRUE; 2109 } 2110 for (i=0; i<A->rmap->N; i++) { 2111 if (!zeroed[i]) { 2112 row = i/bs; 2113 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2114 for (k=0; k<bs; k++) { 2115 col = bs*baij->j[j] + k; 2116 if (zeroed[col]) { 2117 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2118 if (vecs) bb[i] -= aa[0]*xx[col]; 2119 aa[0] = 0.0; 2120 } 2121 } 2122 } 2123 } else if (vecs) bb[i] = diag*xx[i]; 2124 } 2125 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2126 if (vecs) { 2127 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2128 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2129 } 2130 2131 /* zero the rows */ 2132 for (i=0; i<is_n; i++) { 2133 row = is_idx[i]; 2134 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2135 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2136 for (k=0; k<count; k++) { 2137 aa[0] = zero; 2138 aa += bs; 2139 } 2140 if (diag != (PetscScalar)0.0) { 2141 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2142 } 2143 } 2144 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2149 { 2150 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2151 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2152 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2153 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2154 PetscErrorCode ierr; 2155 PetscInt ridx,cidx,bs2=a->bs2; 2156 PetscBool roworiented=a->roworiented; 2157 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2158 2159 PetscFunctionBegin; 2160 for (k=0; k<m; k++) { /* loop over added rows */ 2161 row = im[k]; 2162 brow = row/bs; 2163 if (row < 0) continue; 2164 #if defined(PETSC_USE_DEBUG) 2165 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2166 #endif 2167 rp = aj + ai[brow]; 2168 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2169 rmax = imax[brow]; 2170 nrow = ailen[brow]; 2171 low = 0; 2172 high = nrow; 2173 for (l=0; l<n; l++) { /* loop over added columns */ 2174 if (in[l] < 0) continue; 2175 #if defined(PETSC_USE_DEBUG) 2176 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2177 #endif 2178 col = in[l]; bcol = col/bs; 2179 ridx = row % bs; cidx = col % bs; 2180 if (!A->structure_only) { 2181 if (roworiented) { 2182 value = v[l + k*n]; 2183 } else { 2184 value = v[k + l*m]; 2185 } 2186 } 2187 if (col <= lastcol) low = 0; else high = nrow; 2188 lastcol = col; 2189 while (high-low > 7) { 2190 t = (low+high)/2; 2191 if (rp[t] > bcol) high = t; 2192 else low = t; 2193 } 2194 for (i=low; i<high; i++) { 2195 if (rp[i] > bcol) break; 2196 if (rp[i] == bcol) { 2197 bap = ap + bs2*i + bs*cidx + ridx; 2198 if (!A->structure_only) { 2199 if (is == ADD_VALUES) *bap += value; 2200 else *bap = value; 2201 } 2202 goto noinsert1; 2203 } 2204 } 2205 if (nonew == 1) goto noinsert1; 2206 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2207 if (A->structure_only) { 2208 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2209 } else { 2210 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2211 } 2212 N = nrow++ - 1; high++; 2213 /* shift up all the later entries in this row */ 2214 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2215 rp[i] = bcol; 2216 if (!A->structure_only) { 2217 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2218 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2219 ap[bs2*i + bs*cidx + ridx] = value; 2220 } 2221 a->nz++; 2222 A->nonzerostate++; 2223 noinsert1:; 2224 low = i; 2225 } 2226 ailen[brow] = nrow; 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2232 { 2233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2234 Mat outA; 2235 PetscErrorCode ierr; 2236 PetscBool row_identity,col_identity; 2237 2238 PetscFunctionBegin; 2239 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2240 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2241 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2242 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2243 2244 outA = inA; 2245 inA->factortype = MAT_FACTOR_LU; 2246 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2247 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2248 2249 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2250 2251 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2252 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2253 a->row = row; 2254 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2255 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2256 a->col = col; 2257 2258 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2259 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2260 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2261 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2262 2263 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2264 if (!a->solve_work) { 2265 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2266 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2267 } 2268 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2269 PetscFunctionReturn(0); 2270 } 2271 2272 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2273 { 2274 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2275 PetscInt i,nz,mbs; 2276 2277 PetscFunctionBegin; 2278 nz = baij->maxnz; 2279 mbs = baij->mbs; 2280 for (i=0; i<nz; i++) { 2281 baij->j[i] = indices[i]; 2282 } 2283 baij->nz = nz; 2284 for (i=0; i<mbs; i++) { 2285 baij->ilen[i] = baij->imax[i]; 2286 } 2287 PetscFunctionReturn(0); 2288 } 2289 2290 /*@ 2291 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2292 in the matrix. 2293 2294 Input Parameters: 2295 + mat - the SeqBAIJ matrix 2296 - indices - the column indices 2297 2298 Level: advanced 2299 2300 Notes: 2301 This can be called if you have precomputed the nonzero structure of the 2302 matrix and want to provide it to the matrix object to improve the performance 2303 of the MatSetValues() operation. 2304 2305 You MUST have set the correct numbers of nonzeros per row in the call to 2306 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2307 2308 MUST be called before any calls to MatSetValues(); 2309 2310 @*/ 2311 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2312 { 2313 PetscErrorCode ierr; 2314 2315 PetscFunctionBegin; 2316 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2317 PetscValidPointer(indices,2); 2318 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2323 { 2324 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2325 PetscErrorCode ierr; 2326 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2327 PetscReal atmp; 2328 PetscScalar *x,zero = 0.0; 2329 MatScalar *aa; 2330 PetscInt ncols,brow,krow,kcol; 2331 2332 PetscFunctionBegin; 2333 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2334 bs = A->rmap->bs; 2335 aa = a->a; 2336 ai = a->i; 2337 aj = a->j; 2338 mbs = a->mbs; 2339 2340 ierr = VecSet(v,zero);CHKERRQ(ierr); 2341 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2342 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2343 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2344 for (i=0; i<mbs; i++) { 2345 ncols = ai[1] - ai[0]; ai++; 2346 brow = bs*i; 2347 for (j=0; j<ncols; j++) { 2348 for (kcol=0; kcol<bs; kcol++) { 2349 for (krow=0; krow<bs; krow++) { 2350 atmp = PetscAbsScalar(*aa);aa++; 2351 row = brow + krow; /* row index */ 2352 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2353 } 2354 } 2355 aj++; 2356 } 2357 } 2358 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2359 PetscFunctionReturn(0); 2360 } 2361 2362 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2363 { 2364 PetscErrorCode ierr; 2365 2366 PetscFunctionBegin; 2367 /* If the two matrices have the same copy implementation, use fast copy. */ 2368 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2369 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2370 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2371 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2372 2373 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2374 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2375 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2376 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2377 } else { 2378 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2379 } 2380 PetscFunctionReturn(0); 2381 } 2382 2383 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2384 { 2385 PetscErrorCode ierr; 2386 2387 PetscFunctionBegin; 2388 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2389 PetscFunctionReturn(0); 2390 } 2391 2392 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2393 { 2394 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2395 2396 PetscFunctionBegin; 2397 *array = a->a; 2398 PetscFunctionReturn(0); 2399 } 2400 2401 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2402 { 2403 PetscFunctionBegin; 2404 PetscFunctionReturn(0); 2405 } 2406 2407 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2408 { 2409 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2410 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2411 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2412 PetscErrorCode ierr; 2413 2414 PetscFunctionBegin; 2415 /* Set the number of nonzeros in the new matrix */ 2416 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2421 { 2422 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2423 PetscErrorCode ierr; 2424 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2425 PetscBLASInt one=1; 2426 2427 PetscFunctionBegin; 2428 if (str == SAME_NONZERO_PATTERN) { 2429 PetscScalar alpha = a; 2430 PetscBLASInt bnz; 2431 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2432 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2433 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2434 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2435 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2436 } else { 2437 Mat B; 2438 PetscInt *nnz; 2439 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2440 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2441 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2442 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2443 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2444 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2445 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2446 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2447 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2448 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2449 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2450 ierr = PetscFree(nnz);CHKERRQ(ierr); 2451 } 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2456 { 2457 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2458 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2459 MatScalar *aa = a->a; 2460 2461 PetscFunctionBegin; 2462 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2467 { 2468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2469 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2470 MatScalar *aa = a->a; 2471 2472 PetscFunctionBegin; 2473 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2474 PetscFunctionReturn(0); 2475 } 2476 2477 /* 2478 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2479 */ 2480 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2481 { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2483 PetscErrorCode ierr; 2484 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2485 PetscInt nz = a->i[m],row,*jj,mr,col; 2486 2487 PetscFunctionBegin; 2488 *nn = n; 2489 if (!ia) PetscFunctionReturn(0); 2490 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2491 else { 2492 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2493 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2494 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2495 jj = a->j; 2496 for (i=0; i<nz; i++) { 2497 collengths[jj[i]]++; 2498 } 2499 cia[0] = oshift; 2500 for (i=0; i<n; i++) { 2501 cia[i+1] = cia[i] + collengths[i]; 2502 } 2503 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2504 jj = a->j; 2505 for (row=0; row<m; row++) { 2506 mr = a->i[row+1] - a->i[row]; 2507 for (i=0; i<mr; i++) { 2508 col = *jj++; 2509 2510 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2511 } 2512 } 2513 ierr = PetscFree(collengths);CHKERRQ(ierr); 2514 *ia = cia; *ja = cja; 2515 } 2516 PetscFunctionReturn(0); 2517 } 2518 2519 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2520 { 2521 PetscErrorCode ierr; 2522 2523 PetscFunctionBegin; 2524 if (!ia) PetscFunctionReturn(0); 2525 ierr = PetscFree(*ia);CHKERRQ(ierr); 2526 ierr = PetscFree(*ja);CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 /* 2531 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2532 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2533 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2534 */ 2535 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2536 { 2537 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2538 PetscErrorCode ierr; 2539 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2540 PetscInt nz = a->i[m],row,*jj,mr,col; 2541 PetscInt *cspidx; 2542 2543 PetscFunctionBegin; 2544 *nn = n; 2545 if (!ia) PetscFunctionReturn(0); 2546 2547 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2549 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2550 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2551 jj = a->j; 2552 for (i=0; i<nz; i++) { 2553 collengths[jj[i]]++; 2554 } 2555 cia[0] = oshift; 2556 for (i=0; i<n; i++) { 2557 cia[i+1] = cia[i] + collengths[i]; 2558 } 2559 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2560 jj = a->j; 2561 for (row=0; row<m; row++) { 2562 mr = a->i[row+1] - a->i[row]; 2563 for (i=0; i<mr; i++) { 2564 col = *jj++; 2565 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2566 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2567 } 2568 } 2569 ierr = PetscFree(collengths);CHKERRQ(ierr); 2570 *ia = cia; *ja = cja; 2571 *spidx = cspidx; 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2576 { 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2581 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2586 { 2587 PetscErrorCode ierr; 2588 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2589 2590 PetscFunctionBegin; 2591 if (!Y->preallocated || !aij->nz) { 2592 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2593 } 2594 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 /* -------------------------------------------------------------------*/ 2599 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2600 MatGetRow_SeqBAIJ, 2601 MatRestoreRow_SeqBAIJ, 2602 MatMult_SeqBAIJ_N, 2603 /* 4*/ MatMultAdd_SeqBAIJ_N, 2604 MatMultTranspose_SeqBAIJ, 2605 MatMultTransposeAdd_SeqBAIJ, 2606 0, 2607 0, 2608 0, 2609 /* 10*/ 0, 2610 MatLUFactor_SeqBAIJ, 2611 0, 2612 0, 2613 MatTranspose_SeqBAIJ, 2614 /* 15*/ MatGetInfo_SeqBAIJ, 2615 MatEqual_SeqBAIJ, 2616 MatGetDiagonal_SeqBAIJ, 2617 MatDiagonalScale_SeqBAIJ, 2618 MatNorm_SeqBAIJ, 2619 /* 20*/ 0, 2620 MatAssemblyEnd_SeqBAIJ, 2621 MatSetOption_SeqBAIJ, 2622 MatZeroEntries_SeqBAIJ, 2623 /* 24*/ MatZeroRows_SeqBAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /* 29*/ MatSetUp_SeqBAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /* 34*/ MatDuplicate_SeqBAIJ, 2634 0, 2635 0, 2636 MatILUFactor_SeqBAIJ, 2637 0, 2638 /* 39*/ MatAXPY_SeqBAIJ, 2639 MatCreateSubMatrices_SeqBAIJ, 2640 MatIncreaseOverlap_SeqBAIJ, 2641 MatGetValues_SeqBAIJ, 2642 MatCopy_SeqBAIJ, 2643 /* 44*/ 0, 2644 MatScale_SeqBAIJ, 2645 MatShift_SeqBAIJ, 2646 0, 2647 MatZeroRowsColumns_SeqBAIJ, 2648 /* 49*/ 0, 2649 MatGetRowIJ_SeqBAIJ, 2650 MatRestoreRowIJ_SeqBAIJ, 2651 MatGetColumnIJ_SeqBAIJ, 2652 MatRestoreColumnIJ_SeqBAIJ, 2653 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2654 0, 2655 0, 2656 0, 2657 MatSetValuesBlocked_SeqBAIJ, 2658 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2659 MatDestroy_SeqBAIJ, 2660 MatView_SeqBAIJ, 2661 0, 2662 0, 2663 /* 64*/ 0, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2669 0, 2670 MatConvert_Basic, 2671 0, 2672 0, 2673 /* 74*/ 0, 2674 MatFDColoringApply_BAIJ, 2675 0, 2676 0, 2677 0, 2678 /* 79*/ 0, 2679 0, 2680 0, 2681 0, 2682 MatLoad_SeqBAIJ, 2683 /* 84*/ 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /* 89*/ 0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /* 94*/ 0, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /* 99*/ 0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /*104*/ 0, 2704 MatRealPart_SeqBAIJ, 2705 MatImaginaryPart_SeqBAIJ, 2706 0, 2707 0, 2708 /*109*/ 0, 2709 0, 2710 0, 2711 0, 2712 MatMissingDiagonal_SeqBAIJ, 2713 /*114*/ 0, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*119*/ 0, 2719 0, 2720 MatMultHermitianTranspose_SeqBAIJ, 2721 MatMultHermitianTransposeAdd_SeqBAIJ, 2722 0, 2723 /*124*/ 0, 2724 0, 2725 MatInvertBlockDiagonal_SeqBAIJ, 2726 0, 2727 0, 2728 /*129*/ 0, 2729 0, 2730 0, 2731 0, 2732 0, 2733 /*134*/ 0, 2734 0, 2735 0, 2736 0, 2737 0, 2738 /*139*/ MatSetBlockSizes_Default, 2739 0, 2740 0, 2741 MatFDColoringSetUp_SeqXAIJ, 2742 0, 2743 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2744 MatDestroySubMatrices_SeqBAIJ 2745 }; 2746 2747 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2748 { 2749 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2750 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2755 2756 /* allocate space for values if not already there */ 2757 if (!aij->saved_values) { 2758 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2759 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2760 } 2761 2762 /* copy values over */ 2763 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2768 { 2769 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2770 PetscErrorCode ierr; 2771 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2772 2773 PetscFunctionBegin; 2774 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2775 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2776 2777 /* copy values over */ 2778 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2784 2785 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2786 { 2787 Mat_SeqBAIJ *b; 2788 PetscErrorCode ierr; 2789 PetscInt i,mbs,nbs,bs2; 2790 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2791 2792 PetscFunctionBegin; 2793 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2794 if (nz == MAT_SKIP_ALLOCATION) { 2795 skipallocation = PETSC_TRUE; 2796 nz = 0; 2797 } 2798 2799 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2800 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2801 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2802 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2803 2804 B->preallocated = PETSC_TRUE; 2805 2806 mbs = B->rmap->n/bs; 2807 nbs = B->cmap->n/bs; 2808 bs2 = bs*bs; 2809 2810 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2811 2812 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2813 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2814 if (nnz) { 2815 for (i=0; i<mbs; i++) { 2816 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2817 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2818 } 2819 } 2820 2821 b = (Mat_SeqBAIJ*)B->data; 2822 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2823 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2824 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2825 2826 if (!flg) { 2827 switch (bs) { 2828 case 1: 2829 B->ops->mult = MatMult_SeqBAIJ_1; 2830 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2831 break; 2832 case 2: 2833 B->ops->mult = MatMult_SeqBAIJ_2; 2834 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2835 break; 2836 case 3: 2837 B->ops->mult = MatMult_SeqBAIJ_3; 2838 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2839 break; 2840 case 4: 2841 B->ops->mult = MatMult_SeqBAIJ_4; 2842 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2843 break; 2844 case 5: 2845 B->ops->mult = MatMult_SeqBAIJ_5; 2846 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2847 break; 2848 case 6: 2849 B->ops->mult = MatMult_SeqBAIJ_6; 2850 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2851 break; 2852 case 7: 2853 B->ops->mult = MatMult_SeqBAIJ_7; 2854 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2855 break; 2856 case 9: 2857 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2858 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2859 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2860 #else 2861 B->ops->mult = MatMult_SeqBAIJ_N; 2862 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2863 #endif 2864 break; 2865 case 11: 2866 B->ops->mult = MatMult_SeqBAIJ_11; 2867 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2868 break; 2869 case 15: 2870 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2872 break; 2873 default: 2874 B->ops->mult = MatMult_SeqBAIJ_N; 2875 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2876 break; 2877 } 2878 } 2879 B->ops->sor = MatSOR_SeqBAIJ; 2880 b->mbs = mbs; 2881 b->nbs = nbs; 2882 if (!skipallocation) { 2883 if (!b->imax) { 2884 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2885 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2886 2887 b->free_imax_ilen = PETSC_TRUE; 2888 } 2889 /* b->ilen will count nonzeros in each block row so far. */ 2890 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2891 if (!nnz) { 2892 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2893 else if (nz < 0) nz = 1; 2894 nz = PetscMin(nz,nbs); 2895 for (i=0; i<mbs; i++) b->imax[i] = nz; 2896 nz = nz*mbs; 2897 } else { 2898 nz = 0; 2899 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2900 } 2901 2902 /* allocate the matrix space */ 2903 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2904 if (B->structure_only) { 2905 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2906 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2907 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2908 } else { 2909 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2910 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2911 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2912 } 2913 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 2914 2915 if (B->structure_only) { 2916 b->singlemalloc = PETSC_FALSE; 2917 b->free_a = PETSC_FALSE; 2918 } else { 2919 b->singlemalloc = PETSC_TRUE; 2920 b->free_a = PETSC_TRUE; 2921 } 2922 b->free_ij = PETSC_TRUE; 2923 2924 b->i[0] = 0; 2925 for (i=1; i<mbs+1; i++) { 2926 b->i[i] = b->i[i-1] + b->imax[i-1]; 2927 } 2928 2929 } else { 2930 b->free_a = PETSC_FALSE; 2931 b->free_ij = PETSC_FALSE; 2932 } 2933 2934 b->bs2 = bs2; 2935 b->mbs = mbs; 2936 b->nz = 0; 2937 b->maxnz = nz; 2938 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2939 B->was_assembled = PETSC_FALSE; 2940 B->assembled = PETSC_FALSE; 2941 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2942 PetscFunctionReturn(0); 2943 } 2944 2945 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2946 { 2947 PetscInt i,m,nz,nz_max=0,*nnz; 2948 PetscScalar *values=0; 2949 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2950 PetscErrorCode ierr; 2951 2952 PetscFunctionBegin; 2953 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2954 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2955 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2956 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2957 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2958 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2959 m = B->rmap->n/bs; 2960 2961 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2962 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2963 for (i=0; i<m; i++) { 2964 nz = ii[i+1]- ii[i]; 2965 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2966 nz_max = PetscMax(nz_max, nz); 2967 nnz[i] = nz; 2968 } 2969 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2970 ierr = PetscFree(nnz);CHKERRQ(ierr); 2971 2972 values = (PetscScalar*)V; 2973 if (!values) { 2974 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2975 } 2976 for (i=0; i<m; i++) { 2977 PetscInt ncols = ii[i+1] - ii[i]; 2978 const PetscInt *icols = jj + ii[i]; 2979 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2980 if (!roworiented) { 2981 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2982 } else { 2983 PetscInt j; 2984 for (j=0; j<ncols; j++) { 2985 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2986 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2987 } 2988 } 2989 } 2990 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2991 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2992 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2993 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 2997 /*MC 2998 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2999 block sparse compressed row format. 3000 3001 Options Database Keys: 3002 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3003 3004 Level: beginner 3005 3006 .seealso: MatCreateSeqBAIJ() 3007 M*/ 3008 3009 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3010 3011 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3012 { 3013 PetscErrorCode ierr; 3014 PetscMPIInt size; 3015 Mat_SeqBAIJ *b; 3016 3017 PetscFunctionBegin; 3018 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3019 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3020 3021 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3022 B->data = (void*)b; 3023 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3024 3025 b->row = 0; 3026 b->col = 0; 3027 b->icol = 0; 3028 b->reallocs = 0; 3029 b->saved_values = 0; 3030 3031 b->roworiented = PETSC_TRUE; 3032 b->nonew = 0; 3033 b->diag = 0; 3034 B->spptr = 0; 3035 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3036 b->keepnonzeropattern = PETSC_FALSE; 3037 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3047 #if defined(PETSC_HAVE_HYPRE) 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3049 #endif 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3053 PetscFunctionReturn(0); 3054 } 3055 3056 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3057 { 3058 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3059 PetscErrorCode ierr; 3060 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3061 3062 PetscFunctionBegin; 3063 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3064 3065 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3066 c->imax = a->imax; 3067 c->ilen = a->ilen; 3068 c->free_imax_ilen = PETSC_FALSE; 3069 } else { 3070 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3071 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3072 for (i=0; i<mbs; i++) { 3073 c->imax[i] = a->imax[i]; 3074 c->ilen[i] = a->ilen[i]; 3075 } 3076 c->free_imax_ilen = PETSC_TRUE; 3077 } 3078 3079 /* allocate the matrix space */ 3080 if (mallocmatspace) { 3081 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3082 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3083 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3084 3085 c->i = a->i; 3086 c->j = a->j; 3087 c->singlemalloc = PETSC_FALSE; 3088 c->free_a = PETSC_TRUE; 3089 c->free_ij = PETSC_FALSE; 3090 c->parent = A; 3091 C->preallocated = PETSC_TRUE; 3092 C->assembled = PETSC_TRUE; 3093 3094 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3095 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3096 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3097 } else { 3098 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3099 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3100 3101 c->singlemalloc = PETSC_TRUE; 3102 c->free_a = PETSC_TRUE; 3103 c->free_ij = PETSC_TRUE; 3104 3105 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3106 if (mbs > 0) { 3107 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3108 if (cpvalues == MAT_COPY_VALUES) { 3109 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3110 } else { 3111 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3112 } 3113 } 3114 C->preallocated = PETSC_TRUE; 3115 C->assembled = PETSC_TRUE; 3116 } 3117 } 3118 3119 c->roworiented = a->roworiented; 3120 c->nonew = a->nonew; 3121 3122 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3123 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3124 3125 c->bs2 = a->bs2; 3126 c->mbs = a->mbs; 3127 c->nbs = a->nbs; 3128 3129 if (a->diag) { 3130 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3131 c->diag = a->diag; 3132 c->free_diag = PETSC_FALSE; 3133 } else { 3134 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3135 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3136 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3137 c->free_diag = PETSC_TRUE; 3138 } 3139 } else c->diag = 0; 3140 3141 c->nz = a->nz; 3142 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3143 c->solve_work = NULL; 3144 c->mult_work = NULL; 3145 c->sor_workt = NULL; 3146 c->sor_work = NULL; 3147 3148 c->compressedrow.use = a->compressedrow.use; 3149 c->compressedrow.nrows = a->compressedrow.nrows; 3150 if (a->compressedrow.use) { 3151 i = a->compressedrow.nrows; 3152 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3153 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3154 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3155 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3156 } else { 3157 c->compressedrow.use = PETSC_FALSE; 3158 c->compressedrow.i = NULL; 3159 c->compressedrow.rindex = NULL; 3160 } 3161 C->nonzerostate = A->nonzerostate; 3162 3163 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3164 PetscFunctionReturn(0); 3165 } 3166 3167 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3168 { 3169 PetscErrorCode ierr; 3170 3171 PetscFunctionBegin; 3172 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3173 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3174 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3175 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3176 PetscFunctionReturn(0); 3177 } 3178 3179 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3180 { 3181 Mat_SeqBAIJ *a; 3182 PetscErrorCode ierr; 3183 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3184 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3185 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3186 PetscInt *masked,nmask,tmp,bs2,ishift; 3187 PetscMPIInt size; 3188 int fd; 3189 PetscScalar *aa; 3190 MPI_Comm comm; 3191 PetscBool isbinary; 3192 3193 PetscFunctionBegin; 3194 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3195 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 3196 3197 /* force binary viewer to load .info file if it has not yet done so */ 3198 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3199 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3200 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3201 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3203 if (bs < 0) bs = 1; 3204 bs2 = bs*bs; 3205 3206 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3207 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3208 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3209 ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3210 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3211 M = header[1]; N = header[2]; nz = header[3]; 3212 3213 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3214 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3215 3216 /* 3217 This code adds extra rows to make sure the number of rows is 3218 divisible by the blocksize 3219 */ 3220 mbs = M/bs; 3221 extra_rows = bs - M + bs*(mbs); 3222 if (extra_rows == bs) extra_rows = 0; 3223 else mbs++; 3224 if (extra_rows) { 3225 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3226 } 3227 3228 /* Set global sizes if not already set */ 3229 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3230 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3231 } else { /* Check if the matrix global sizes are correct */ 3232 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3233 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3234 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3235 } 3236 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3237 } 3238 3239 /* read in row lengths */ 3240 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3241 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 3242 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3243 3244 /* read in column indices */ 3245 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3246 ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3247 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3248 3249 /* loop over row lengths determining block row lengths */ 3250 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3251 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3252 ierr = PetscArrayzero(mask,mbs);CHKERRQ(ierr); 3253 rowcount = 0; 3254 nzcount = 0; 3255 for (i=0; i<mbs; i++) { 3256 nmask = 0; 3257 for (j=0; j<bs; j++) { 3258 kmax = rowlengths[rowcount]; 3259 for (k=0; k<kmax; k++) { 3260 tmp = jj[nzcount++]/bs; 3261 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3262 } 3263 rowcount++; 3264 } 3265 browlengths[i] += nmask; 3266 /* zero out the mask elements we set */ 3267 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3268 } 3269 3270 /* Do preallocation */ 3271 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3272 a = (Mat_SeqBAIJ*)newmat->data; 3273 3274 /* set matrix "i" values */ 3275 a->i[0] = 0; 3276 for (i=1; i<= mbs; i++) { 3277 a->i[i] = a->i[i-1] + browlengths[i-1]; 3278 a->ilen[i-1] = browlengths[i-1]; 3279 } 3280 a->nz = 0; 3281 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3282 3283 /* read in nonzero values */ 3284 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3285 ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3286 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3287 3288 /* set "a" and "j" values into matrix */ 3289 nzcount = 0; jcount = 0; 3290 for (i=0; i<mbs; i++) { 3291 nzcountb = nzcount; 3292 nmask = 0; 3293 for (j=0; j<bs; j++) { 3294 kmax = rowlengths[i*bs+j]; 3295 for (k=0; k<kmax; k++) { 3296 tmp = jj[nzcount++]/bs; 3297 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3298 } 3299 } 3300 /* sort the masked values */ 3301 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3302 3303 /* set "j" values into matrix */ 3304 maskcount = 1; 3305 for (j=0; j<nmask; j++) { 3306 a->j[jcount++] = masked[j]; 3307 mask[masked[j]] = maskcount++; 3308 } 3309 /* set "a" values into matrix */ 3310 ishift = bs2*a->i[i]; 3311 for (j=0; j<bs; j++) { 3312 kmax = rowlengths[i*bs+j]; 3313 for (k=0; k<kmax; k++) { 3314 tmp = jj[nzcountb]/bs; 3315 block = mask[tmp] - 1; 3316 point = jj[nzcountb] - bs*tmp; 3317 idx = ishift + bs2*block + j + bs*point; 3318 a->a[idx] = (MatScalar)aa[nzcountb++]; 3319 } 3320 } 3321 /* zero out the mask elements we set */ 3322 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3323 } 3324 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3325 3326 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3327 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3328 ierr = PetscFree(aa);CHKERRQ(ierr); 3329 ierr = PetscFree(jj);CHKERRQ(ierr); 3330 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3331 3332 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3333 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3334 PetscFunctionReturn(0); 3335 } 3336 3337 /*@C 3338 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3339 compressed row) format. For good matrix assembly performance the 3340 user should preallocate the matrix storage by setting the parameter nz 3341 (or the array nnz). By setting these parameters accurately, performance 3342 during matrix assembly can be increased by more than a factor of 50. 3343 3344 Collective 3345 3346 Input Parameters: 3347 + comm - MPI communicator, set to PETSC_COMM_SELF 3348 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3349 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3350 . m - number of rows 3351 . n - number of columns 3352 . nz - number of nonzero blocks per block row (same for all rows) 3353 - nnz - array containing the number of nonzero blocks in the various block rows 3354 (possibly different for each block row) or NULL 3355 3356 Output Parameter: 3357 . A - the matrix 3358 3359 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3360 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3361 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3362 3363 Options Database Keys: 3364 . -mat_no_unroll - uses code that does not unroll the loops in the 3365 block calculations (much slower) 3366 . -mat_block_size - size of the blocks to use 3367 3368 Level: intermediate 3369 3370 Notes: 3371 The number of rows and columns must be divisible by blocksize. 3372 3373 If the nnz parameter is given then the nz parameter is ignored 3374 3375 A nonzero block is any block that as 1 or more nonzeros in it 3376 3377 The block AIJ format is fully compatible with standard Fortran 77 3378 storage. That is, the stored row and column indices can begin at 3379 either one (as in Fortran) or zero. See the users' manual for details. 3380 3381 Specify the preallocated storage with either nz or nnz (not both). 3382 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3383 allocation. See Users-Manual: ch_mat for details. 3384 matrices. 3385 3386 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3387 @*/ 3388 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3389 { 3390 PetscErrorCode ierr; 3391 3392 PetscFunctionBegin; 3393 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3394 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3395 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3396 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3397 PetscFunctionReturn(0); 3398 } 3399 3400 /*@C 3401 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3402 per row in the matrix. For good matrix assembly performance the 3403 user should preallocate the matrix storage by setting the parameter nz 3404 (or the array nnz). By setting these parameters accurately, performance 3405 during matrix assembly can be increased by more than a factor of 50. 3406 3407 Collective 3408 3409 Input Parameters: 3410 + B - the matrix 3411 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3412 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3413 . nz - number of block nonzeros per block row (same for all rows) 3414 - nnz - array containing the number of block nonzeros in the various block rows 3415 (possibly different for each block row) or NULL 3416 3417 Options Database Keys: 3418 . -mat_no_unroll - uses code that does not unroll the loops in the 3419 block calculations (much slower) 3420 . -mat_block_size - size of the blocks to use 3421 3422 Level: intermediate 3423 3424 Notes: 3425 If the nnz parameter is given then the nz parameter is ignored 3426 3427 You can call MatGetInfo() to get information on how effective the preallocation was; 3428 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3429 You can also run with the option -info and look for messages with the string 3430 malloc in them to see if additional memory allocation was needed. 3431 3432 The block AIJ format is fully compatible with standard Fortran 77 3433 storage. That is, the stored row and column indices can begin at 3434 either one (as in Fortran) or zero. See the users' manual for details. 3435 3436 Specify the preallocated storage with either nz or nnz (not both). 3437 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3438 allocation. See Users-Manual: ch_mat for details. 3439 3440 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3441 @*/ 3442 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3443 { 3444 PetscErrorCode ierr; 3445 3446 PetscFunctionBegin; 3447 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3448 PetscValidType(B,1); 3449 PetscValidLogicalCollectiveInt(B,bs,2); 3450 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3451 PetscFunctionReturn(0); 3452 } 3453 3454 /*@C 3455 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3456 (the default sequential PETSc format). 3457 3458 Collective 3459 3460 Input Parameters: 3461 + B - the matrix 3462 . i - the indices into j for the start of each local row (starts with zero) 3463 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3464 - v - optional values in the matrix 3465 3466 Level: developer 3467 3468 Notes: 3469 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3470 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3471 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3472 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3473 block column and the second index is over columns within a block. 3474 3475 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3476 @*/ 3477 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3478 { 3479 PetscErrorCode ierr; 3480 3481 PetscFunctionBegin; 3482 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3483 PetscValidType(B,1); 3484 PetscValidLogicalCollectiveInt(B,bs,2); 3485 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3486 PetscFunctionReturn(0); 3487 } 3488 3489 3490 /*@ 3491 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3492 3493 Collective 3494 3495 Input Parameters: 3496 + comm - must be an MPI communicator of size 1 3497 . bs - size of block 3498 . m - number of rows 3499 . n - number of columns 3500 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3501 . j - column indices 3502 - a - matrix values 3503 3504 Output Parameter: 3505 . mat - the matrix 3506 3507 Level: advanced 3508 3509 Notes: 3510 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3511 once the matrix is destroyed 3512 3513 You cannot set new nonzero locations into this matrix, that will generate an error. 3514 3515 The i and j indices are 0 based 3516 3517 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3518 3519 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3520 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3521 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3522 with column-major ordering within blocks. 3523 3524 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3525 3526 @*/ 3527 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3528 { 3529 PetscErrorCode ierr; 3530 PetscInt ii; 3531 Mat_SeqBAIJ *baij; 3532 3533 PetscFunctionBegin; 3534 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3535 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3536 3537 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3538 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3539 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3540 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3541 baij = (Mat_SeqBAIJ*)(*mat)->data; 3542 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3543 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3544 3545 baij->i = i; 3546 baij->j = j; 3547 baij->a = a; 3548 3549 baij->singlemalloc = PETSC_FALSE; 3550 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3551 baij->free_a = PETSC_FALSE; 3552 baij->free_ij = PETSC_FALSE; 3553 3554 for (ii=0; ii<m; ii++) { 3555 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3556 #if defined(PETSC_USE_DEBUG) 3557 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3558 #endif 3559 } 3560 #if defined(PETSC_USE_DEBUG) 3561 for (ii=0; ii<baij->i[m]; ii++) { 3562 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3563 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3564 } 3565 #endif 3566 3567 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3568 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3569 PetscFunctionReturn(0); 3570 } 3571 3572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3573 { 3574 PetscErrorCode ierr; 3575 PetscMPIInt size; 3576 3577 PetscFunctionBegin; 3578 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3579 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3580 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3581 } else { 3582 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3583 } 3584 PetscFunctionReturn(0); 3585 } 3586