1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 913 PetscFunctionBegin; 914 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 915 stepval = (n-1)*4; 916 for (k=0; k<m; k++) { /* loop over added rows */ 917 row = im[k]; 918 rp = aj + ai[row]; 919 ap = aa + 16*ai[row]; 920 nrow = ailen[row]; 921 low = 0; 922 high = nrow; 923 for (l=0; l<n; l++) { /* loop over added columns */ 924 col = in[l]; 925 if (col <= lastcol) low = 0; 926 else high = nrow; 927 lastcol = col; 928 value = v + k*(stepval+4 + l)*4; 929 while (high-low > 7) { 930 t = (low+high)/2; 931 if (rp[t] > col) high = t; 932 else low = t; 933 } 934 for (i=low; i<high; i++) { 935 if (rp[i] > col) break; 936 if (rp[i] == col) { 937 bap = ap + 16*i; 938 for (ii=0; ii<4; ii++,value+=stepval) { 939 for (jj=ii; jj<16; jj+=4) { 940 bap[jj] += *value++; 941 } 942 } 943 goto noinsert2; 944 } 945 } 946 N = nrow++ - 1; 947 high++; /* added new column index thus must search to one higher than before */ 948 /* shift up all the later entries in this row */ 949 for (ii=N; ii>=i; ii--) { 950 rp[ii+1] = rp[ii]; 951 PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16); 952 } 953 if (N >= i) { 954 PetscArrayzero(ap+16*i,16); 955 } 956 rp[i] = col; 957 bap = ap + 16*i; 958 for (ii=0; ii<4; ii++,value+=stepval) { 959 for (jj=ii; jj<16; jj+=4) { 960 bap[jj] = *value++; 961 } 962 } 963 noinsert2:; 964 low = i; 965 } 966 ailen[row] = nrow; 967 } 968 PetscFunctionReturnVoid(); 969 } 970 971 #if defined(PETSC_HAVE_FORTRAN_CAPS) 972 #define matsetvalues4_ MATSETVALUES4 973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 974 #define matsetvalues4_ matsetvalues4 975 #endif 976 977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 978 { 979 Mat A = *AA; 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 981 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 982 PetscInt *ai=a->i,*ailen=a->ilen; 983 PetscInt *aj=a->j,brow,bcol; 984 PetscInt ridx,cidx,lastcol = -1; 985 MatScalar *ap,value,*aa=a->a,*bap; 986 987 PetscFunctionBegin; 988 for (k=0; k<m; k++) { /* loop over added rows */ 989 row = im[k]; brow = row/4; 990 rp = aj + ai[brow]; 991 ap = aa + 16*ai[brow]; 992 nrow = ailen[brow]; 993 low = 0; 994 high = nrow; 995 for (l=0; l<n; l++) { /* loop over added columns */ 996 col = in[l]; bcol = col/4; 997 ridx = row % 4; cidx = col % 4; 998 value = v[l + k*n]; 999 if (col <= lastcol) low = 0; 1000 else high = nrow; 1001 lastcol = col; 1002 while (high-low > 7) { 1003 t = (low+high)/2; 1004 if (rp[t] > bcol) high = t; 1005 else low = t; 1006 } 1007 for (i=low; i<high; i++) { 1008 if (rp[i] > bcol) break; 1009 if (rp[i] == bcol) { 1010 bap = ap + 16*i + 4*cidx + ridx; 1011 *bap += value; 1012 goto noinsert1; 1013 } 1014 } 1015 N = nrow++ - 1; 1016 high++; /* added new column thus must search to one higher than before */ 1017 /* shift up all the later entries in this row */ 1018 PetscArraymove(rp+i+1,rp+i,N-i+1); 1019 PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1)); 1020 PetscArrayzero(ap+16*i,16); 1021 rp[i] = bcol; 1022 ap[16*i + 4*cidx + ridx] = value; 1023 noinsert1:; 1024 low = i; 1025 } 1026 ailen[brow] = nrow; 1027 } 1028 PetscFunctionReturnVoid(); 1029 } 1030 1031 /* 1032 Checks for missing diagonals 1033 */ 1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1035 { 1036 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1037 PetscErrorCode ierr; 1038 PetscInt *diag,*ii = a->i,i; 1039 1040 PetscFunctionBegin; 1041 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1042 *missing = PETSC_FALSE; 1043 if (A->rmap->n > 0 && !ii) { 1044 *missing = PETSC_TRUE; 1045 if (d) *d = 0; 1046 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1047 } else { 1048 diag = a->diag; 1049 for (i=0; i<a->mbs; i++) { 1050 if (diag[i] >= ii[i+1]) { 1051 *missing = PETSC_TRUE; 1052 if (d) *d = i; 1053 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1054 break; 1055 } 1056 } 1057 } 1058 PetscFunctionReturn(0); 1059 } 1060 1061 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1062 { 1063 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1064 PetscErrorCode ierr; 1065 PetscInt i,j,m = a->mbs; 1066 1067 PetscFunctionBegin; 1068 if (!a->diag) { 1069 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1070 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1071 a->free_diag = PETSC_TRUE; 1072 } 1073 for (i=0; i<m; i++) { 1074 a->diag[i] = a->i[i+1]; 1075 for (j=a->i[i]; j<a->i[i+1]; j++) { 1076 if (a->j[j] == i) { 1077 a->diag[i] = j; 1078 break; 1079 } 1080 } 1081 } 1082 PetscFunctionReturn(0); 1083 } 1084 1085 1086 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1087 { 1088 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1089 PetscErrorCode ierr; 1090 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1091 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1092 1093 PetscFunctionBegin; 1094 *nn = n; 1095 if (!ia) PetscFunctionReturn(0); 1096 if (symmetric) { 1097 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1098 nz = tia[n]; 1099 } else { 1100 tia = a->i; tja = a->j; 1101 } 1102 1103 if (!blockcompressed && bs > 1) { 1104 (*nn) *= bs; 1105 /* malloc & create the natural set of indices */ 1106 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1107 if (n) { 1108 (*ia)[0] = oshift; 1109 for (j=1; j<bs; j++) { 1110 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1111 } 1112 } 1113 1114 for (i=1; i<n; i++) { 1115 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1116 for (j=1; j<bs; j++) { 1117 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1118 } 1119 } 1120 if (n) { 1121 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1122 } 1123 1124 if (inja) { 1125 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1126 cnt = 0; 1127 for (i=0; i<n; i++) { 1128 for (j=0; j<bs; j++) { 1129 for (k=tia[i]; k<tia[i+1]; k++) { 1130 for (l=0; l<bs; l++) { 1131 (*ja)[cnt++] = bs*tja[k] + l; 1132 } 1133 } 1134 } 1135 } 1136 } 1137 1138 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1139 ierr = PetscFree(tia);CHKERRQ(ierr); 1140 ierr = PetscFree(tja);CHKERRQ(ierr); 1141 } 1142 } else if (oshift == 1) { 1143 if (symmetric) { 1144 nz = tia[A->rmap->n/bs]; 1145 /* add 1 to i and j indices */ 1146 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1147 *ia = tia; 1148 if (ja) { 1149 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1150 *ja = tja; 1151 } 1152 } else { 1153 nz = a->i[A->rmap->n/bs]; 1154 /* malloc space and add 1 to i and j indices */ 1155 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1156 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1157 if (ja) { 1158 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1159 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1160 } 1161 } 1162 } else { 1163 *ia = tia; 1164 if (ja) *ja = tja; 1165 } 1166 PetscFunctionReturn(0); 1167 } 1168 1169 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1170 { 1171 PetscErrorCode ierr; 1172 1173 PetscFunctionBegin; 1174 if (!ia) PetscFunctionReturn(0); 1175 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1176 ierr = PetscFree(*ia);CHKERRQ(ierr); 1177 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1178 } 1179 PetscFunctionReturn(0); 1180 } 1181 1182 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1183 { 1184 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1185 PetscErrorCode ierr; 1186 1187 PetscFunctionBegin; 1188 #if defined(PETSC_USE_LOG) 1189 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1190 #endif 1191 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1192 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1193 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1194 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1195 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1196 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1197 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1198 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1199 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1200 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1201 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1202 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1203 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1204 1205 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1206 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1207 ierr = PetscFree(A->data);CHKERRQ(ierr); 1208 1209 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1210 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1211 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1212 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1213 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1220 #if defined(PETSC_HAVE_HYPRE) 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1222 #endif 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1229 { 1230 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1231 PetscErrorCode ierr; 1232 1233 PetscFunctionBegin; 1234 switch (op) { 1235 case MAT_ROW_ORIENTED: 1236 a->roworiented = flg; 1237 break; 1238 case MAT_KEEP_NONZERO_PATTERN: 1239 a->keepnonzeropattern = flg; 1240 break; 1241 case MAT_NEW_NONZERO_LOCATIONS: 1242 a->nonew = (flg ? 0 : 1); 1243 break; 1244 case MAT_NEW_NONZERO_LOCATION_ERR: 1245 a->nonew = (flg ? -1 : 0); 1246 break; 1247 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1248 a->nonew = (flg ? -2 : 0); 1249 break; 1250 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1251 a->nounused = (flg ? -1 : 0); 1252 break; 1253 case MAT_NEW_DIAGONALS: 1254 case MAT_IGNORE_OFF_PROC_ENTRIES: 1255 case MAT_USE_HASH_TABLE: 1256 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1257 break; 1258 case MAT_SPD: 1259 case MAT_SYMMETRIC: 1260 case MAT_STRUCTURALLY_SYMMETRIC: 1261 case MAT_HERMITIAN: 1262 case MAT_SYMMETRY_ETERNAL: 1263 case MAT_SUBMAT_SINGLEIS: 1264 case MAT_STRUCTURE_ONLY: 1265 /* These options are handled directly by MatSetOption() */ 1266 break; 1267 default: 1268 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1269 } 1270 PetscFunctionReturn(0); 1271 } 1272 1273 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1274 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1275 { 1276 PetscErrorCode ierr; 1277 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1278 MatScalar *aa_i; 1279 PetscScalar *v_i; 1280 1281 PetscFunctionBegin; 1282 bs = A->rmap->bs; 1283 bs2 = bs*bs; 1284 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1285 1286 bn = row/bs; /* Block number */ 1287 bp = row % bs; /* Block Position */ 1288 M = ai[bn+1] - ai[bn]; 1289 *nz = bs*M; 1290 1291 if (v) { 1292 *v = 0; 1293 if (*nz) { 1294 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1295 for (i=0; i<M; i++) { /* for each block in the block row */ 1296 v_i = *v + i*bs; 1297 aa_i = aa + bs2*(ai[bn] + i); 1298 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1299 } 1300 } 1301 } 1302 1303 if (idx) { 1304 *idx = 0; 1305 if (*nz) { 1306 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1307 for (i=0; i<M; i++) { /* for each block in the block row */ 1308 idx_i = *idx + i*bs; 1309 itmp = bs*aj[ai[bn] + i]; 1310 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1311 } 1312 } 1313 } 1314 PetscFunctionReturn(0); 1315 } 1316 1317 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1318 { 1319 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1328 { 1329 PetscErrorCode ierr; 1330 1331 PetscFunctionBegin; 1332 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1333 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1334 PetscFunctionReturn(0); 1335 } 1336 1337 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1338 { 1339 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1340 Mat C; 1341 PetscErrorCode ierr; 1342 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1343 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1344 MatScalar *ata,*aa=a->a; 1345 1346 PetscFunctionBegin; 1347 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1348 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1349 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1350 1351 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1352 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1353 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1354 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1355 1356 at = (Mat_SeqBAIJ*)C->data; 1357 ati = at->i; 1358 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1359 } else { 1360 C = *B; 1361 at = (Mat_SeqBAIJ*)C->data; 1362 ati = at->i; 1363 } 1364 1365 atj = at->j; 1366 ata = at->a; 1367 1368 /* Copy ati into atfill so we have locations of the next free space in atj */ 1369 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1370 1371 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1372 for (i=0; i<mbs; i++) { 1373 anzj = ai[i+1] - ai[i]; 1374 for (j=0; j<anzj; j++) { 1375 atj[atfill[*aj]] = i; 1376 for (kr=0; kr<bs; kr++) { 1377 for (k=0; k<bs; k++) { 1378 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1379 } 1380 } 1381 atfill[*aj++] += 1; 1382 } 1383 } 1384 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1385 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1386 1387 /* Clean up temporary space and complete requests. */ 1388 ierr = PetscFree(atfill);CHKERRQ(ierr); 1389 1390 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1391 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1392 *B = C; 1393 } else { 1394 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1395 } 1396 PetscFunctionReturn(0); 1397 } 1398 1399 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1400 { 1401 PetscErrorCode ierr; 1402 Mat Btrans; 1403 1404 PetscFunctionBegin; 1405 *f = PETSC_FALSE; 1406 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1407 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1408 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 1412 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1413 { 1414 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1415 PetscErrorCode ierr; 1416 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1417 int fd; 1418 PetscScalar *aa; 1419 FILE *file; 1420 1421 PetscFunctionBegin; 1422 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1423 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1424 col_lens[0] = MAT_FILE_CLASSID; 1425 1426 col_lens[1] = A->rmap->N; 1427 col_lens[2] = A->cmap->n; 1428 col_lens[3] = a->nz*bs2; 1429 1430 /* store lengths of each row and write (including header) to file */ 1431 count = 0; 1432 for (i=0; i<a->mbs; i++) { 1433 for (j=0; j<bs; j++) { 1434 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1435 } 1436 } 1437 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1438 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1439 1440 /* store column indices (zero start index) */ 1441 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1442 count = 0; 1443 for (i=0; i<a->mbs; i++) { 1444 for (j=0; j<bs; j++) { 1445 for (k=a->i[i]; k<a->i[i+1]; k++) { 1446 for (l=0; l<bs; l++) { 1447 jj[count++] = bs*a->j[k] + l; 1448 } 1449 } 1450 } 1451 } 1452 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1453 ierr = PetscFree(jj);CHKERRQ(ierr); 1454 1455 /* store nonzero values */ 1456 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1457 count = 0; 1458 for (i=0; i<a->mbs; i++) { 1459 for (j=0; j<bs; j++) { 1460 for (k=a->i[i]; k<a->i[i+1]; k++) { 1461 for (l=0; l<bs; l++) { 1462 aa[count++] = a->a[bs2*k + l*bs + j]; 1463 } 1464 } 1465 } 1466 } 1467 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1468 ierr = PetscFree(aa);CHKERRQ(ierr); 1469 1470 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1471 if (file) { 1472 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1473 } 1474 PetscFunctionReturn(0); 1475 } 1476 1477 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1478 { 1479 PetscErrorCode ierr; 1480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1481 PetscInt i,bs = A->rmap->bs,k; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1485 for (i=0; i<a->mbs; i++) { 1486 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1487 for (k=a->i[i]; k<a->i[i+1]; k++) { 1488 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1489 } 1490 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1491 } 1492 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1497 { 1498 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1501 PetscViewerFormat format; 1502 1503 PetscFunctionBegin; 1504 if (A->structure_only) { 1505 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 1509 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1510 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1511 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1512 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1513 const char *matname; 1514 Mat aij; 1515 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1516 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1517 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1518 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1519 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1520 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1521 PetscFunctionReturn(0); 1522 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1523 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1524 for (i=0; i<a->mbs; i++) { 1525 for (j=0; j<bs; j++) { 1526 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1527 for (k=a->i[i]; k<a->i[i+1]; k++) { 1528 for (l=0; l<bs; l++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1531 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1532 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1533 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1535 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1536 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1537 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1538 } 1539 #else 1540 if (a->a[bs2*k + l*bs + j] != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1542 } 1543 #endif 1544 } 1545 } 1546 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1547 } 1548 } 1549 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1550 } else { 1551 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1552 for (i=0; i<a->mbs; i++) { 1553 for (j=0; j<bs; j++) { 1554 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1555 for (k=a->i[i]; k<a->i[i+1]; k++) { 1556 for (l=0; l<bs; l++) { 1557 #if defined(PETSC_USE_COMPLEX) 1558 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1559 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1560 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1561 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1562 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1563 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1564 } else { 1565 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1566 } 1567 #else 1568 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1569 #endif 1570 } 1571 } 1572 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1573 } 1574 } 1575 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1576 } 1577 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #include <petscdraw.h> 1582 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1583 { 1584 Mat A = (Mat) Aa; 1585 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1586 PetscErrorCode ierr; 1587 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1588 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1589 MatScalar *aa; 1590 PetscViewer viewer; 1591 PetscViewerFormat format; 1592 1593 PetscFunctionBegin; 1594 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1596 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1597 1598 /* loop over matrix elements drawing boxes */ 1599 1600 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1601 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1602 /* Blue for negative, Cyan for zero and Red for positive */ 1603 color = PETSC_DRAW_BLUE; 1604 for (i=0,row=0; i<mbs; i++,row+=bs) { 1605 for (j=a->i[i]; j<a->i[i+1]; j++) { 1606 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1607 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1608 aa = a->a + j*bs2; 1609 for (k=0; k<bs; k++) { 1610 for (l=0; l<bs; l++) { 1611 if (PetscRealPart(*aa++) >= 0.) continue; 1612 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1613 } 1614 } 1615 } 1616 } 1617 color = PETSC_DRAW_CYAN; 1618 for (i=0,row=0; i<mbs; i++,row+=bs) { 1619 for (j=a->i[i]; j<a->i[i+1]; j++) { 1620 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1621 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1622 aa = a->a + j*bs2; 1623 for (k=0; k<bs; k++) { 1624 for (l=0; l<bs; l++) { 1625 if (PetscRealPart(*aa++) != 0.) continue; 1626 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1627 } 1628 } 1629 } 1630 } 1631 color = PETSC_DRAW_RED; 1632 for (i=0,row=0; i<mbs; i++,row+=bs) { 1633 for (j=a->i[i]; j<a->i[i+1]; j++) { 1634 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1635 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1636 aa = a->a + j*bs2; 1637 for (k=0; k<bs; k++) { 1638 for (l=0; l<bs; l++) { 1639 if (PetscRealPart(*aa++) <= 0.) continue; 1640 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1641 } 1642 } 1643 } 1644 } 1645 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1646 } else { 1647 /* use contour shading to indicate magnitude of values */ 1648 /* first determine max of all nonzero values */ 1649 PetscReal minv = 0.0, maxv = 0.0; 1650 PetscDraw popup; 1651 1652 for (i=0; i<a->nz*a->bs2; i++) { 1653 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1654 } 1655 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1656 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1657 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1658 1659 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1660 for (i=0,row=0; i<mbs; i++,row+=bs) { 1661 for (j=a->i[i]; j<a->i[i+1]; j++) { 1662 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1663 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1664 aa = a->a + j*bs2; 1665 for (k=0; k<bs; k++) { 1666 for (l=0; l<bs; l++) { 1667 MatScalar v = *aa++; 1668 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1669 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1670 } 1671 } 1672 } 1673 } 1674 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1675 } 1676 PetscFunctionReturn(0); 1677 } 1678 1679 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1680 { 1681 PetscErrorCode ierr; 1682 PetscReal xl,yl,xr,yr,w,h; 1683 PetscDraw draw; 1684 PetscBool isnull; 1685 1686 PetscFunctionBegin; 1687 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1688 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1689 if (isnull) PetscFunctionReturn(0); 1690 1691 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1692 xr += w; yr += h; xl = -w; yl = -h; 1693 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1694 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1695 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1696 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1697 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1702 { 1703 PetscErrorCode ierr; 1704 PetscBool iascii,isbinary,isdraw; 1705 1706 PetscFunctionBegin; 1707 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1708 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1709 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1710 if (iascii) { 1711 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1712 } else if (isbinary) { 1713 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1714 } else if (isdraw) { 1715 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1716 } else { 1717 Mat B; 1718 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1719 ierr = MatView(B,viewer);CHKERRQ(ierr); 1720 ierr = MatDestroy(&B);CHKERRQ(ierr); 1721 } 1722 PetscFunctionReturn(0); 1723 } 1724 1725 1726 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1727 { 1728 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1729 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1730 PetscInt *ai = a->i,*ailen = a->ilen; 1731 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1732 MatScalar *ap,*aa = a->a; 1733 1734 PetscFunctionBegin; 1735 for (k=0; k<m; k++) { /* loop over rows */ 1736 row = im[k]; brow = row/bs; 1737 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1738 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1739 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1740 nrow = ailen[brow]; 1741 for (l=0; l<n; l++) { /* loop over columns */ 1742 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1743 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1744 col = in[l]; 1745 bcol = col/bs; 1746 cidx = col%bs; 1747 ridx = row%bs; 1748 high = nrow; 1749 low = 0; /* assume unsorted */ 1750 while (high-low > 5) { 1751 t = (low+high)/2; 1752 if (rp[t] > bcol) high = t; 1753 else low = t; 1754 } 1755 for (i=low; i<high; i++) { 1756 if (rp[i] > bcol) break; 1757 if (rp[i] == bcol) { 1758 *v++ = ap[bs2*i+bs*cidx+ridx]; 1759 goto finished; 1760 } 1761 } 1762 *v++ = 0.0; 1763 finished:; 1764 } 1765 } 1766 PetscFunctionReturn(0); 1767 } 1768 1769 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1770 { 1771 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1772 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1773 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1774 PetscErrorCode ierr; 1775 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1776 PetscBool roworiented=a->roworiented; 1777 const PetscScalar *value = v; 1778 MatScalar *ap=NULL,*aa = a->a,*bap; 1779 1780 PetscFunctionBegin; 1781 if (roworiented) { 1782 stepval = (n-1)*bs; 1783 } else { 1784 stepval = (m-1)*bs; 1785 } 1786 for (k=0; k<m; k++) { /* loop over added rows */ 1787 row = im[k]; 1788 if (row < 0) continue; 1789 #if defined(PETSC_USE_DEBUG) 1790 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1791 #endif 1792 rp = aj + ai[row]; 1793 if (!A->structure_only) ap = aa + bs2*ai[row]; 1794 rmax = imax[row]; 1795 nrow = ailen[row]; 1796 low = 0; 1797 high = nrow; 1798 for (l=0; l<n; l++) { /* loop over added columns */ 1799 if (in[l] < 0) continue; 1800 #if defined(PETSC_USE_DEBUG) 1801 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1802 #endif 1803 col = in[l]; 1804 if (!A->structure_only) { 1805 if (roworiented) { 1806 value = v + (k*(stepval+bs) + l)*bs; 1807 } else { 1808 value = v + (l*(stepval+bs) + k)*bs; 1809 } 1810 } 1811 if (col <= lastcol) low = 0; 1812 else high = nrow; 1813 lastcol = col; 1814 while (high-low > 7) { 1815 t = (low+high)/2; 1816 if (rp[t] > col) high = t; 1817 else low = t; 1818 } 1819 for (i=low; i<high; i++) { 1820 if (rp[i] > col) break; 1821 if (rp[i] == col) { 1822 if (A->structure_only) goto noinsert2; 1823 bap = ap + bs2*i; 1824 if (roworiented) { 1825 if (is == ADD_VALUES) { 1826 for (ii=0; ii<bs; ii++,value+=stepval) { 1827 for (jj=ii; jj<bs2; jj+=bs) { 1828 bap[jj] += *value++; 1829 } 1830 } 1831 } else { 1832 for (ii=0; ii<bs; ii++,value+=stepval) { 1833 for (jj=ii; jj<bs2; jj+=bs) { 1834 bap[jj] = *value++; 1835 } 1836 } 1837 } 1838 } else { 1839 if (is == ADD_VALUES) { 1840 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1841 for (jj=0; jj<bs; jj++) { 1842 bap[jj] += value[jj]; 1843 } 1844 bap += bs; 1845 } 1846 } else { 1847 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1848 for (jj=0; jj<bs; jj++) { 1849 bap[jj] = value[jj]; 1850 } 1851 bap += bs; 1852 } 1853 } 1854 } 1855 goto noinsert2; 1856 } 1857 } 1858 if (nonew == 1) goto noinsert2; 1859 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1860 if (A->structure_only) { 1861 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1862 } else { 1863 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1864 } 1865 N = nrow++ - 1; high++; 1866 /* shift up all the later entries in this row */ 1867 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1868 rp[i] = col; 1869 if (!A->structure_only) { 1870 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1871 bap = ap + bs2*i; 1872 if (roworiented) { 1873 for (ii=0; ii<bs; ii++,value+=stepval) { 1874 for (jj=ii; jj<bs2; jj+=bs) { 1875 bap[jj] = *value++; 1876 } 1877 } 1878 } else { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=0; jj<bs; jj++) { 1881 *bap++ = *value++; 1882 } 1883 } 1884 } 1885 } 1886 noinsert2:; 1887 low = i; 1888 } 1889 ailen[row] = nrow; 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1895 { 1896 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1897 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1898 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1899 PetscErrorCode ierr; 1900 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1901 MatScalar *aa = a->a,*ap; 1902 PetscReal ratio=0.6; 1903 1904 PetscFunctionBegin; 1905 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1906 1907 if (m) rmax = ailen[0]; 1908 for (i=1; i<mbs; i++) { 1909 /* move each row back by the amount of empty slots (fshift) before it*/ 1910 fshift += imax[i-1] - ailen[i-1]; 1911 rmax = PetscMax(rmax,ailen[i]); 1912 if (fshift) { 1913 ip = aj + ai[i]; 1914 ap = aa + bs2*ai[i]; 1915 N = ailen[i]; 1916 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1917 if (!A->structure_only) { 1918 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1919 } 1920 } 1921 ai[i] = ai[i-1] + ailen[i-1]; 1922 } 1923 if (mbs) { 1924 fshift += imax[mbs-1] - ailen[mbs-1]; 1925 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1926 } 1927 1928 /* reset ilen and imax for each row */ 1929 a->nonzerorowcnt = 0; 1930 if (A->structure_only) { 1931 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1932 } else { /* !A->structure_only */ 1933 for (i=0; i<mbs; i++) { 1934 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1935 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1936 } 1937 } 1938 a->nz = ai[mbs]; 1939 1940 /* diagonals may have moved, so kill the diagonal pointers */ 1941 a->idiagvalid = PETSC_FALSE; 1942 if (fshift && a->diag) { 1943 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1944 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1945 a->diag = 0; 1946 } 1947 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1948 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1949 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1950 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1951 1952 A->info.mallocs += a->reallocs; 1953 a->reallocs = 0; 1954 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1955 a->rmax = rmax; 1956 1957 if (!A->structure_only) { 1958 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 /* 1964 This function returns an array of flags which indicate the locations of contiguous 1965 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1966 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1967 Assume: sizes should be long enough to hold all the values. 1968 */ 1969 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1970 { 1971 PetscInt i,j,k,row; 1972 PetscBool flg; 1973 1974 PetscFunctionBegin; 1975 for (i=0,j=0; i<n; j++) { 1976 row = idx[i]; 1977 if (row%bs!=0) { /* Not the begining of a block */ 1978 sizes[j] = 1; 1979 i++; 1980 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1981 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1982 i++; 1983 } else { /* Begining of the block, so check if the complete block exists */ 1984 flg = PETSC_TRUE; 1985 for (k=1; k<bs; k++) { 1986 if (row+k != idx[i+k]) { /* break in the block */ 1987 flg = PETSC_FALSE; 1988 break; 1989 } 1990 } 1991 if (flg) { /* No break in the bs */ 1992 sizes[j] = bs; 1993 i += bs; 1994 } else { 1995 sizes[j] = 1; 1996 i++; 1997 } 1998 } 1999 } 2000 *bs_max = j; 2001 PetscFunctionReturn(0); 2002 } 2003 2004 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2005 { 2006 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2007 PetscErrorCode ierr; 2008 PetscInt i,j,k,count,*rows; 2009 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2010 PetscScalar zero = 0.0; 2011 MatScalar *aa; 2012 const PetscScalar *xx; 2013 PetscScalar *bb; 2014 2015 PetscFunctionBegin; 2016 /* fix right hand side if needed */ 2017 if (x && b) { 2018 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2019 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2020 for (i=0; i<is_n; i++) { 2021 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2022 } 2023 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2024 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2025 } 2026 2027 /* Make a copy of the IS and sort it */ 2028 /* allocate memory for rows,sizes */ 2029 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2030 2031 /* copy IS values to rows, and sort them */ 2032 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2033 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2034 2035 if (baij->keepnonzeropattern) { 2036 for (i=0; i<is_n; i++) sizes[i] = 1; 2037 bs_max = is_n; 2038 } else { 2039 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2040 A->nonzerostate++; 2041 } 2042 2043 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2044 row = rows[j]; 2045 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2046 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2047 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2048 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2049 if (diag != (PetscScalar)0.0) { 2050 if (baij->ilen[row/bs] > 0) { 2051 baij->ilen[row/bs] = 1; 2052 baij->j[baij->i[row/bs]] = row/bs; 2053 2054 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2055 } 2056 /* Now insert all the diagonal values for this bs */ 2057 for (k=0; k<bs; k++) { 2058 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2059 } 2060 } else { /* (diag == 0.0) */ 2061 baij->ilen[row/bs] = 0; 2062 } /* end (diag == 0.0) */ 2063 } else { /* (sizes[i] != bs) */ 2064 #if defined(PETSC_USE_DEBUG) 2065 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2066 #endif 2067 for (k=0; k<count; k++) { 2068 aa[0] = zero; 2069 aa += bs; 2070 } 2071 if (diag != (PetscScalar)0.0) { 2072 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2073 } 2074 } 2075 } 2076 2077 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2083 { 2084 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2085 PetscErrorCode ierr; 2086 PetscInt i,j,k,count; 2087 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2088 PetscScalar zero = 0.0; 2089 MatScalar *aa; 2090 const PetscScalar *xx; 2091 PetscScalar *bb; 2092 PetscBool *zeroed,vecs = PETSC_FALSE; 2093 2094 PetscFunctionBegin; 2095 /* fix right hand side if needed */ 2096 if (x && b) { 2097 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2098 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2099 vecs = PETSC_TRUE; 2100 } 2101 2102 /* zero the columns */ 2103 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2104 for (i=0; i<is_n; i++) { 2105 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2106 zeroed[is_idx[i]] = PETSC_TRUE; 2107 } 2108 for (i=0; i<A->rmap->N; i++) { 2109 if (!zeroed[i]) { 2110 row = i/bs; 2111 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2112 for (k=0; k<bs; k++) { 2113 col = bs*baij->j[j] + k; 2114 if (zeroed[col]) { 2115 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2116 if (vecs) bb[i] -= aa[0]*xx[col]; 2117 aa[0] = 0.0; 2118 } 2119 } 2120 } 2121 } else if (vecs) bb[i] = diag*xx[i]; 2122 } 2123 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2124 if (vecs) { 2125 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2126 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2127 } 2128 2129 /* zero the rows */ 2130 for (i=0; i<is_n; i++) { 2131 row = is_idx[i]; 2132 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2133 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2134 for (k=0; k<count; k++) { 2135 aa[0] = zero; 2136 aa += bs; 2137 } 2138 if (diag != (PetscScalar)0.0) { 2139 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2140 } 2141 } 2142 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2147 { 2148 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2149 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2150 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2151 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2152 PetscErrorCode ierr; 2153 PetscInt ridx,cidx,bs2=a->bs2; 2154 PetscBool roworiented=a->roworiented; 2155 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2156 2157 PetscFunctionBegin; 2158 for (k=0; k<m; k++) { /* loop over added rows */ 2159 row = im[k]; 2160 brow = row/bs; 2161 if (row < 0) continue; 2162 #if defined(PETSC_USE_DEBUG) 2163 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2164 #endif 2165 rp = aj + ai[brow]; 2166 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2167 rmax = imax[brow]; 2168 nrow = ailen[brow]; 2169 low = 0; 2170 high = nrow; 2171 for (l=0; l<n; l++) { /* loop over added columns */ 2172 if (in[l] < 0) continue; 2173 #if defined(PETSC_USE_DEBUG) 2174 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2175 #endif 2176 col = in[l]; bcol = col/bs; 2177 ridx = row % bs; cidx = col % bs; 2178 if (!A->structure_only) { 2179 if (roworiented) { 2180 value = v[l + k*n]; 2181 } else { 2182 value = v[k + l*m]; 2183 } 2184 } 2185 if (col <= lastcol) low = 0; else high = nrow; 2186 lastcol = col; 2187 while (high-low > 7) { 2188 t = (low+high)/2; 2189 if (rp[t] > bcol) high = t; 2190 else low = t; 2191 } 2192 for (i=low; i<high; i++) { 2193 if (rp[i] > bcol) break; 2194 if (rp[i] == bcol) { 2195 bap = ap + bs2*i + bs*cidx + ridx; 2196 if (!A->structure_only) { 2197 if (is == ADD_VALUES) *bap += value; 2198 else *bap = value; 2199 } 2200 goto noinsert1; 2201 } 2202 } 2203 if (nonew == 1) goto noinsert1; 2204 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2205 if (A->structure_only) { 2206 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2207 } else { 2208 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2209 } 2210 N = nrow++ - 1; high++; 2211 /* shift up all the later entries in this row */ 2212 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2213 rp[i] = bcol; 2214 if (!A->structure_only) { 2215 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2216 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2217 ap[bs2*i + bs*cidx + ridx] = value; 2218 } 2219 a->nz++; 2220 A->nonzerostate++; 2221 noinsert1:; 2222 low = i; 2223 } 2224 ailen[brow] = nrow; 2225 } 2226 PetscFunctionReturn(0); 2227 } 2228 2229 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2230 { 2231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2232 Mat outA; 2233 PetscErrorCode ierr; 2234 PetscBool row_identity,col_identity; 2235 2236 PetscFunctionBegin; 2237 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2238 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2239 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2240 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2241 2242 outA = inA; 2243 inA->factortype = MAT_FACTOR_LU; 2244 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2245 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2246 2247 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2248 2249 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2250 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2251 a->row = row; 2252 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2253 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2254 a->col = col; 2255 2256 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2257 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2258 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2259 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2260 2261 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2262 if (!a->solve_work) { 2263 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2264 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2265 } 2266 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2271 { 2272 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2273 PetscInt i,nz,mbs; 2274 2275 PetscFunctionBegin; 2276 nz = baij->maxnz; 2277 mbs = baij->mbs; 2278 for (i=0; i<nz; i++) { 2279 baij->j[i] = indices[i]; 2280 } 2281 baij->nz = nz; 2282 for (i=0; i<mbs; i++) { 2283 baij->ilen[i] = baij->imax[i]; 2284 } 2285 PetscFunctionReturn(0); 2286 } 2287 2288 /*@ 2289 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2290 in the matrix. 2291 2292 Input Parameters: 2293 + mat - the SeqBAIJ matrix 2294 - indices - the column indices 2295 2296 Level: advanced 2297 2298 Notes: 2299 This can be called if you have precomputed the nonzero structure of the 2300 matrix and want to provide it to the matrix object to improve the performance 2301 of the MatSetValues() operation. 2302 2303 You MUST have set the correct numbers of nonzeros per row in the call to 2304 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2305 2306 MUST be called before any calls to MatSetValues(); 2307 2308 @*/ 2309 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2310 { 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2315 PetscValidPointer(indices,2); 2316 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2321 { 2322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2323 PetscErrorCode ierr; 2324 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2325 PetscReal atmp; 2326 PetscScalar *x,zero = 0.0; 2327 MatScalar *aa; 2328 PetscInt ncols,brow,krow,kcol; 2329 2330 PetscFunctionBegin; 2331 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2332 bs = A->rmap->bs; 2333 aa = a->a; 2334 ai = a->i; 2335 aj = a->j; 2336 mbs = a->mbs; 2337 2338 ierr = VecSet(v,zero);CHKERRQ(ierr); 2339 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2340 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2341 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2342 for (i=0; i<mbs; i++) { 2343 ncols = ai[1] - ai[0]; ai++; 2344 brow = bs*i; 2345 for (j=0; j<ncols; j++) { 2346 for (kcol=0; kcol<bs; kcol++) { 2347 for (krow=0; krow<bs; krow++) { 2348 atmp = PetscAbsScalar(*aa);aa++; 2349 row = brow + krow; /* row index */ 2350 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2351 } 2352 } 2353 aj++; 2354 } 2355 } 2356 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2357 PetscFunctionReturn(0); 2358 } 2359 2360 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2361 { 2362 PetscErrorCode ierr; 2363 2364 PetscFunctionBegin; 2365 /* If the two matrices have the same copy implementation, use fast copy. */ 2366 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2368 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2369 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2370 2371 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2372 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2373 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2374 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2375 } else { 2376 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2377 } 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2382 { 2383 PetscErrorCode ierr; 2384 2385 PetscFunctionBegin; 2386 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2391 { 2392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2393 2394 PetscFunctionBegin; 2395 *array = a->a; 2396 PetscFunctionReturn(0); 2397 } 2398 2399 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2400 { 2401 PetscFunctionBegin; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2406 { 2407 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2408 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2409 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2410 PetscErrorCode ierr; 2411 2412 PetscFunctionBegin; 2413 /* Set the number of nonzeros in the new matrix */ 2414 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2419 { 2420 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2421 PetscErrorCode ierr; 2422 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2423 PetscBLASInt one=1; 2424 2425 PetscFunctionBegin; 2426 if (str == SAME_NONZERO_PATTERN) { 2427 PetscScalar alpha = a; 2428 PetscBLASInt bnz; 2429 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2430 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2431 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2432 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2433 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2434 } else { 2435 Mat B; 2436 PetscInt *nnz; 2437 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2438 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2439 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2440 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2441 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2442 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2443 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2444 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2445 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2446 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2447 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2448 ierr = PetscFree(nnz);CHKERRQ(ierr); 2449 } 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2454 { 2455 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2456 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2457 MatScalar *aa = a->a; 2458 2459 PetscFunctionBegin; 2460 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2465 { 2466 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2467 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2468 MatScalar *aa = a->a; 2469 2470 PetscFunctionBegin; 2471 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 /* 2476 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2477 */ 2478 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2479 { 2480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2481 PetscErrorCode ierr; 2482 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2483 PetscInt nz = a->i[m],row,*jj,mr,col; 2484 2485 PetscFunctionBegin; 2486 *nn = n; 2487 if (!ia) PetscFunctionReturn(0); 2488 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2489 else { 2490 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2491 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2492 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2493 jj = a->j; 2494 for (i=0; i<nz; i++) { 2495 collengths[jj[i]]++; 2496 } 2497 cia[0] = oshift; 2498 for (i=0; i<n; i++) { 2499 cia[i+1] = cia[i] + collengths[i]; 2500 } 2501 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2502 jj = a->j; 2503 for (row=0; row<m; row++) { 2504 mr = a->i[row+1] - a->i[row]; 2505 for (i=0; i<mr; i++) { 2506 col = *jj++; 2507 2508 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2509 } 2510 } 2511 ierr = PetscFree(collengths);CHKERRQ(ierr); 2512 *ia = cia; *ja = cja; 2513 } 2514 PetscFunctionReturn(0); 2515 } 2516 2517 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2518 { 2519 PetscErrorCode ierr; 2520 2521 PetscFunctionBegin; 2522 if (!ia) PetscFunctionReturn(0); 2523 ierr = PetscFree(*ia);CHKERRQ(ierr); 2524 ierr = PetscFree(*ja);CHKERRQ(ierr); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 /* 2529 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2530 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2531 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2532 */ 2533 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2534 { 2535 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2536 PetscErrorCode ierr; 2537 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2538 PetscInt nz = a->i[m],row,*jj,mr,col; 2539 PetscInt *cspidx; 2540 2541 PetscFunctionBegin; 2542 *nn = n; 2543 if (!ia) PetscFunctionReturn(0); 2544 2545 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2546 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2547 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2549 jj = a->j; 2550 for (i=0; i<nz; i++) { 2551 collengths[jj[i]]++; 2552 } 2553 cia[0] = oshift; 2554 for (i=0; i<n; i++) { 2555 cia[i+1] = cia[i] + collengths[i]; 2556 } 2557 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2558 jj = a->j; 2559 for (row=0; row<m; row++) { 2560 mr = a->i[row+1] - a->i[row]; 2561 for (i=0; i<mr; i++) { 2562 col = *jj++; 2563 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2564 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2565 } 2566 } 2567 ierr = PetscFree(collengths);CHKERRQ(ierr); 2568 *ia = cia; *ja = cja; 2569 *spidx = cspidx; 2570 PetscFunctionReturn(0); 2571 } 2572 2573 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2574 { 2575 PetscErrorCode ierr; 2576 2577 PetscFunctionBegin; 2578 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2579 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2580 PetscFunctionReturn(0); 2581 } 2582 2583 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2584 { 2585 PetscErrorCode ierr; 2586 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2587 2588 PetscFunctionBegin; 2589 if (!Y->preallocated || !aij->nz) { 2590 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2591 } 2592 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2593 PetscFunctionReturn(0); 2594 } 2595 2596 /* -------------------------------------------------------------------*/ 2597 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2598 MatGetRow_SeqBAIJ, 2599 MatRestoreRow_SeqBAIJ, 2600 MatMult_SeqBAIJ_N, 2601 /* 4*/ MatMultAdd_SeqBAIJ_N, 2602 MatMultTranspose_SeqBAIJ, 2603 MatMultTransposeAdd_SeqBAIJ, 2604 0, 2605 0, 2606 0, 2607 /* 10*/ 0, 2608 MatLUFactor_SeqBAIJ, 2609 0, 2610 0, 2611 MatTranspose_SeqBAIJ, 2612 /* 15*/ MatGetInfo_SeqBAIJ, 2613 MatEqual_SeqBAIJ, 2614 MatGetDiagonal_SeqBAIJ, 2615 MatDiagonalScale_SeqBAIJ, 2616 MatNorm_SeqBAIJ, 2617 /* 20*/ 0, 2618 MatAssemblyEnd_SeqBAIJ, 2619 MatSetOption_SeqBAIJ, 2620 MatZeroEntries_SeqBAIJ, 2621 /* 24*/ MatZeroRows_SeqBAIJ, 2622 0, 2623 0, 2624 0, 2625 0, 2626 /* 29*/ MatSetUp_SeqBAIJ, 2627 0, 2628 0, 2629 0, 2630 0, 2631 /* 34*/ MatDuplicate_SeqBAIJ, 2632 0, 2633 0, 2634 MatILUFactor_SeqBAIJ, 2635 0, 2636 /* 39*/ MatAXPY_SeqBAIJ, 2637 MatCreateSubMatrices_SeqBAIJ, 2638 MatIncreaseOverlap_SeqBAIJ, 2639 MatGetValues_SeqBAIJ, 2640 MatCopy_SeqBAIJ, 2641 /* 44*/ 0, 2642 MatScale_SeqBAIJ, 2643 MatShift_SeqBAIJ, 2644 0, 2645 MatZeroRowsColumns_SeqBAIJ, 2646 /* 49*/ 0, 2647 MatGetRowIJ_SeqBAIJ, 2648 MatRestoreRowIJ_SeqBAIJ, 2649 MatGetColumnIJ_SeqBAIJ, 2650 MatRestoreColumnIJ_SeqBAIJ, 2651 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2652 0, 2653 0, 2654 0, 2655 MatSetValuesBlocked_SeqBAIJ, 2656 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2657 MatDestroy_SeqBAIJ, 2658 MatView_SeqBAIJ, 2659 0, 2660 0, 2661 /* 64*/ 0, 2662 0, 2663 0, 2664 0, 2665 0, 2666 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2667 0, 2668 MatConvert_Basic, 2669 0, 2670 0, 2671 /* 74*/ 0, 2672 MatFDColoringApply_BAIJ, 2673 0, 2674 0, 2675 0, 2676 /* 79*/ 0, 2677 0, 2678 0, 2679 0, 2680 MatLoad_SeqBAIJ, 2681 /* 84*/ 0, 2682 0, 2683 0, 2684 0, 2685 0, 2686 /* 89*/ 0, 2687 0, 2688 0, 2689 0, 2690 0, 2691 /* 94*/ 0, 2692 0, 2693 0, 2694 0, 2695 0, 2696 /* 99*/ 0, 2697 0, 2698 0, 2699 0, 2700 0, 2701 /*104*/ 0, 2702 MatRealPart_SeqBAIJ, 2703 MatImaginaryPart_SeqBAIJ, 2704 0, 2705 0, 2706 /*109*/ 0, 2707 0, 2708 0, 2709 0, 2710 MatMissingDiagonal_SeqBAIJ, 2711 /*114*/ 0, 2712 0, 2713 0, 2714 0, 2715 0, 2716 /*119*/ 0, 2717 0, 2718 MatMultHermitianTranspose_SeqBAIJ, 2719 MatMultHermitianTransposeAdd_SeqBAIJ, 2720 0, 2721 /*124*/ 0, 2722 0, 2723 MatInvertBlockDiagonal_SeqBAIJ, 2724 0, 2725 0, 2726 /*129*/ 0, 2727 0, 2728 0, 2729 0, 2730 0, 2731 /*134*/ 0, 2732 0, 2733 0, 2734 0, 2735 0, 2736 /*139*/ MatSetBlockSizes_Default, 2737 0, 2738 0, 2739 MatFDColoringSetUp_SeqXAIJ, 2740 0, 2741 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2742 MatDestroySubMatrices_SeqBAIJ 2743 }; 2744 2745 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2746 { 2747 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2748 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2749 PetscErrorCode ierr; 2750 2751 PetscFunctionBegin; 2752 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2753 2754 /* allocate space for values if not already there */ 2755 if (!aij->saved_values) { 2756 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2757 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2758 } 2759 2760 /* copy values over */ 2761 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2762 PetscFunctionReturn(0); 2763 } 2764 2765 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2766 { 2767 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2768 PetscErrorCode ierr; 2769 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2770 2771 PetscFunctionBegin; 2772 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2773 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2774 2775 /* copy values over */ 2776 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2781 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2782 2783 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2784 { 2785 Mat_SeqBAIJ *b; 2786 PetscErrorCode ierr; 2787 PetscInt i,mbs,nbs,bs2; 2788 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2789 2790 PetscFunctionBegin; 2791 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2792 if (nz == MAT_SKIP_ALLOCATION) { 2793 skipallocation = PETSC_TRUE; 2794 nz = 0; 2795 } 2796 2797 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2798 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2799 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2800 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2801 2802 B->preallocated = PETSC_TRUE; 2803 2804 mbs = B->rmap->n/bs; 2805 nbs = B->cmap->n/bs; 2806 bs2 = bs*bs; 2807 2808 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2809 2810 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2811 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2812 if (nnz) { 2813 for (i=0; i<mbs; i++) { 2814 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2815 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2816 } 2817 } 2818 2819 b = (Mat_SeqBAIJ*)B->data; 2820 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2821 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2822 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2823 2824 if (!flg) { 2825 switch (bs) { 2826 case 1: 2827 B->ops->mult = MatMult_SeqBAIJ_1; 2828 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2829 break; 2830 case 2: 2831 B->ops->mult = MatMult_SeqBAIJ_2; 2832 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2833 break; 2834 case 3: 2835 B->ops->mult = MatMult_SeqBAIJ_3; 2836 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2837 break; 2838 case 4: 2839 B->ops->mult = MatMult_SeqBAIJ_4; 2840 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2841 break; 2842 case 5: 2843 B->ops->mult = MatMult_SeqBAIJ_5; 2844 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2845 break; 2846 case 6: 2847 B->ops->mult = MatMult_SeqBAIJ_6; 2848 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2849 break; 2850 case 7: 2851 B->ops->mult = MatMult_SeqBAIJ_7; 2852 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2853 break; 2854 case 9: 2855 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2856 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2857 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2858 #else 2859 B->ops->mult = MatMult_SeqBAIJ_N; 2860 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2861 #endif 2862 break; 2863 case 11: 2864 B->ops->mult = MatMult_SeqBAIJ_11; 2865 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2866 break; 2867 case 15: 2868 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2869 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2870 break; 2871 default: 2872 B->ops->mult = MatMult_SeqBAIJ_N; 2873 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2874 break; 2875 } 2876 } 2877 B->ops->sor = MatSOR_SeqBAIJ; 2878 b->mbs = mbs; 2879 b->nbs = nbs; 2880 if (!skipallocation) { 2881 if (!b->imax) { 2882 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2883 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2884 2885 b->free_imax_ilen = PETSC_TRUE; 2886 } 2887 /* b->ilen will count nonzeros in each block row so far. */ 2888 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2889 if (!nnz) { 2890 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2891 else if (nz < 0) nz = 1; 2892 nz = PetscMin(nz,nbs); 2893 for (i=0; i<mbs; i++) b->imax[i] = nz; 2894 nz = nz*mbs; 2895 } else { 2896 nz = 0; 2897 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2898 } 2899 2900 /* allocate the matrix space */ 2901 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2902 if (B->structure_only) { 2903 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2904 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2905 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2906 } else { 2907 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2908 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2909 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2910 } 2911 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 2912 2913 if (B->structure_only) { 2914 b->singlemalloc = PETSC_FALSE; 2915 b->free_a = PETSC_FALSE; 2916 } else { 2917 b->singlemalloc = PETSC_TRUE; 2918 b->free_a = PETSC_TRUE; 2919 } 2920 b->free_ij = PETSC_TRUE; 2921 2922 b->i[0] = 0; 2923 for (i=1; i<mbs+1; i++) { 2924 b->i[i] = b->i[i-1] + b->imax[i-1]; 2925 } 2926 2927 } else { 2928 b->free_a = PETSC_FALSE; 2929 b->free_ij = PETSC_FALSE; 2930 } 2931 2932 b->bs2 = bs2; 2933 b->mbs = mbs; 2934 b->nz = 0; 2935 b->maxnz = nz; 2936 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2937 B->was_assembled = PETSC_FALSE; 2938 B->assembled = PETSC_FALSE; 2939 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2940 PetscFunctionReturn(0); 2941 } 2942 2943 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2944 { 2945 PetscInt i,m,nz,nz_max=0,*nnz; 2946 PetscScalar *values=0; 2947 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2948 PetscErrorCode ierr; 2949 2950 PetscFunctionBegin; 2951 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2952 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2953 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2954 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2955 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2956 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2957 m = B->rmap->n/bs; 2958 2959 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2960 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2961 for (i=0; i<m; i++) { 2962 nz = ii[i+1]- ii[i]; 2963 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2964 nz_max = PetscMax(nz_max, nz); 2965 nnz[i] = nz; 2966 } 2967 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2968 ierr = PetscFree(nnz);CHKERRQ(ierr); 2969 2970 values = (PetscScalar*)V; 2971 if (!values) { 2972 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2973 } 2974 for (i=0; i<m; i++) { 2975 PetscInt ncols = ii[i+1] - ii[i]; 2976 const PetscInt *icols = jj + ii[i]; 2977 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2978 if (!roworiented) { 2979 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2980 } else { 2981 PetscInt j; 2982 for (j=0; j<ncols; j++) { 2983 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2984 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2985 } 2986 } 2987 } 2988 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2989 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2990 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2991 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2992 PetscFunctionReturn(0); 2993 } 2994 2995 /*MC 2996 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 2997 block sparse compressed row format. 2998 2999 Options Database Keys: 3000 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3001 3002 Level: beginner 3003 3004 .seealso: MatCreateSeqBAIJ() 3005 M*/ 3006 3007 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3008 3009 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3010 { 3011 PetscErrorCode ierr; 3012 PetscMPIInt size; 3013 Mat_SeqBAIJ *b; 3014 3015 PetscFunctionBegin; 3016 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3017 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3018 3019 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3020 B->data = (void*)b; 3021 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3022 3023 b->row = 0; 3024 b->col = 0; 3025 b->icol = 0; 3026 b->reallocs = 0; 3027 b->saved_values = 0; 3028 3029 b->roworiented = PETSC_TRUE; 3030 b->nonew = 0; 3031 b->diag = 0; 3032 B->spptr = 0; 3033 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3034 b->keepnonzeropattern = PETSC_FALSE; 3035 3036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3045 #if defined(PETSC_HAVE_HYPRE) 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3047 #endif 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3051 PetscFunctionReturn(0); 3052 } 3053 3054 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3055 { 3056 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3057 PetscErrorCode ierr; 3058 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3059 3060 PetscFunctionBegin; 3061 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3062 3063 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3064 c->imax = a->imax; 3065 c->ilen = a->ilen; 3066 c->free_imax_ilen = PETSC_FALSE; 3067 } else { 3068 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3069 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3070 for (i=0; i<mbs; i++) { 3071 c->imax[i] = a->imax[i]; 3072 c->ilen[i] = a->ilen[i]; 3073 } 3074 c->free_imax_ilen = PETSC_TRUE; 3075 } 3076 3077 /* allocate the matrix space */ 3078 if (mallocmatspace) { 3079 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3080 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3081 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3082 3083 c->i = a->i; 3084 c->j = a->j; 3085 c->singlemalloc = PETSC_FALSE; 3086 c->free_a = PETSC_TRUE; 3087 c->free_ij = PETSC_FALSE; 3088 c->parent = A; 3089 C->preallocated = PETSC_TRUE; 3090 C->assembled = PETSC_TRUE; 3091 3092 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3093 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3094 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3095 } else { 3096 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3097 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3098 3099 c->singlemalloc = PETSC_TRUE; 3100 c->free_a = PETSC_TRUE; 3101 c->free_ij = PETSC_TRUE; 3102 3103 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3104 if (mbs > 0) { 3105 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3106 if (cpvalues == MAT_COPY_VALUES) { 3107 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3108 } else { 3109 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3110 } 3111 } 3112 C->preallocated = PETSC_TRUE; 3113 C->assembled = PETSC_TRUE; 3114 } 3115 } 3116 3117 c->roworiented = a->roworiented; 3118 c->nonew = a->nonew; 3119 3120 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3121 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3122 3123 c->bs2 = a->bs2; 3124 c->mbs = a->mbs; 3125 c->nbs = a->nbs; 3126 3127 if (a->diag) { 3128 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3129 c->diag = a->diag; 3130 c->free_diag = PETSC_FALSE; 3131 } else { 3132 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3133 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3134 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3135 c->free_diag = PETSC_TRUE; 3136 } 3137 } else c->diag = 0; 3138 3139 c->nz = a->nz; 3140 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3141 c->solve_work = NULL; 3142 c->mult_work = NULL; 3143 c->sor_workt = NULL; 3144 c->sor_work = NULL; 3145 3146 c->compressedrow.use = a->compressedrow.use; 3147 c->compressedrow.nrows = a->compressedrow.nrows; 3148 if (a->compressedrow.use) { 3149 i = a->compressedrow.nrows; 3150 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3151 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3152 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3153 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3154 } else { 3155 c->compressedrow.use = PETSC_FALSE; 3156 c->compressedrow.i = NULL; 3157 c->compressedrow.rindex = NULL; 3158 } 3159 C->nonzerostate = A->nonzerostate; 3160 3161 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3162 PetscFunctionReturn(0); 3163 } 3164 3165 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3166 { 3167 PetscErrorCode ierr; 3168 3169 PetscFunctionBegin; 3170 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3171 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3172 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3173 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3174 PetscFunctionReturn(0); 3175 } 3176 3177 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3178 { 3179 Mat_SeqBAIJ *a; 3180 PetscErrorCode ierr; 3181 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3182 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3183 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3184 PetscInt *masked,nmask,tmp,bs2,ishift; 3185 PetscMPIInt size; 3186 int fd; 3187 PetscScalar *aa; 3188 MPI_Comm comm; 3189 PetscBool isbinary; 3190 3191 PetscFunctionBegin; 3192 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3193 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 3194 3195 /* force binary viewer to load .info file if it has not yet done so */ 3196 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3197 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3198 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3199 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3200 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3201 if (bs < 0) bs = 1; 3202 bs2 = bs*bs; 3203 3204 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3205 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3206 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3207 ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3208 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3209 M = header[1]; N = header[2]; nz = header[3]; 3210 3211 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3212 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3213 3214 /* 3215 This code adds extra rows to make sure the number of rows is 3216 divisible by the blocksize 3217 */ 3218 mbs = M/bs; 3219 extra_rows = bs - M + bs*(mbs); 3220 if (extra_rows == bs) extra_rows = 0; 3221 else mbs++; 3222 if (extra_rows) { 3223 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3224 } 3225 3226 /* Set global sizes if not already set */ 3227 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3228 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3229 } else { /* Check if the matrix global sizes are correct */ 3230 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3231 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3232 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3233 } 3234 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3235 } 3236 3237 /* read in row lengths */ 3238 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3239 ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr); 3240 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3241 3242 /* read in column indices */ 3243 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3244 ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3245 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3246 3247 /* loop over row lengths determining block row lengths */ 3248 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3249 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3250 ierr = PetscArrayzero(mask,mbs);CHKERRQ(ierr); 3251 rowcount = 0; 3252 nzcount = 0; 3253 for (i=0; i<mbs; i++) { 3254 nmask = 0; 3255 for (j=0; j<bs; j++) { 3256 kmax = rowlengths[rowcount]; 3257 for (k=0; k<kmax; k++) { 3258 tmp = jj[nzcount++]/bs; 3259 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3260 } 3261 rowcount++; 3262 } 3263 browlengths[i] += nmask; 3264 /* zero out the mask elements we set */ 3265 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3266 } 3267 3268 /* Do preallocation */ 3269 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3270 a = (Mat_SeqBAIJ*)newmat->data; 3271 3272 /* set matrix "i" values */ 3273 a->i[0] = 0; 3274 for (i=1; i<= mbs; i++) { 3275 a->i[i] = a->i[i-1] + browlengths[i-1]; 3276 a->ilen[i-1] = browlengths[i-1]; 3277 } 3278 a->nz = 0; 3279 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3280 3281 /* read in nonzero values */ 3282 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3283 ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3284 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3285 3286 /* set "a" and "j" values into matrix */ 3287 nzcount = 0; jcount = 0; 3288 for (i=0; i<mbs; i++) { 3289 nzcountb = nzcount; 3290 nmask = 0; 3291 for (j=0; j<bs; j++) { 3292 kmax = rowlengths[i*bs+j]; 3293 for (k=0; k<kmax; k++) { 3294 tmp = jj[nzcount++]/bs; 3295 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3296 } 3297 } 3298 /* sort the masked values */ 3299 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3300 3301 /* set "j" values into matrix */ 3302 maskcount = 1; 3303 for (j=0; j<nmask; j++) { 3304 a->j[jcount++] = masked[j]; 3305 mask[masked[j]] = maskcount++; 3306 } 3307 /* set "a" values into matrix */ 3308 ishift = bs2*a->i[i]; 3309 for (j=0; j<bs; j++) { 3310 kmax = rowlengths[i*bs+j]; 3311 for (k=0; k<kmax; k++) { 3312 tmp = jj[nzcountb]/bs; 3313 block = mask[tmp] - 1; 3314 point = jj[nzcountb] - bs*tmp; 3315 idx = ishift + bs2*block + j + bs*point; 3316 a->a[idx] = (MatScalar)aa[nzcountb++]; 3317 } 3318 } 3319 /* zero out the mask elements we set */ 3320 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3321 } 3322 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3323 3324 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3325 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3326 ierr = PetscFree(aa);CHKERRQ(ierr); 3327 ierr = PetscFree(jj);CHKERRQ(ierr); 3328 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3329 3330 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3331 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3332 PetscFunctionReturn(0); 3333 } 3334 3335 /*@C 3336 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3337 compressed row) format. For good matrix assembly performance the 3338 user should preallocate the matrix storage by setting the parameter nz 3339 (or the array nnz). By setting these parameters accurately, performance 3340 during matrix assembly can be increased by more than a factor of 50. 3341 3342 Collective 3343 3344 Input Parameters: 3345 + comm - MPI communicator, set to PETSC_COMM_SELF 3346 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3347 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3348 . m - number of rows 3349 . n - number of columns 3350 . nz - number of nonzero blocks per block row (same for all rows) 3351 - nnz - array containing the number of nonzero blocks in the various block rows 3352 (possibly different for each block row) or NULL 3353 3354 Output Parameter: 3355 . A - the matrix 3356 3357 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3358 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3359 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3360 3361 Options Database Keys: 3362 . -mat_no_unroll - uses code that does not unroll the loops in the 3363 block calculations (much slower) 3364 . -mat_block_size - size of the blocks to use 3365 3366 Level: intermediate 3367 3368 Notes: 3369 The number of rows and columns must be divisible by blocksize. 3370 3371 If the nnz parameter is given then the nz parameter is ignored 3372 3373 A nonzero block is any block that as 1 or more nonzeros in it 3374 3375 The block AIJ format is fully compatible with standard Fortran 77 3376 storage. That is, the stored row and column indices can begin at 3377 either one (as in Fortran) or zero. See the users' manual for details. 3378 3379 Specify the preallocated storage with either nz or nnz (not both). 3380 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3381 allocation. See Users-Manual: ch_mat for details. 3382 matrices. 3383 3384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3385 @*/ 3386 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3387 { 3388 PetscErrorCode ierr; 3389 3390 PetscFunctionBegin; 3391 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3392 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3393 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3394 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3395 PetscFunctionReturn(0); 3396 } 3397 3398 /*@C 3399 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3400 per row in the matrix. For good matrix assembly performance the 3401 user should preallocate the matrix storage by setting the parameter nz 3402 (or the array nnz). By setting these parameters accurately, performance 3403 during matrix assembly can be increased by more than a factor of 50. 3404 3405 Collective 3406 3407 Input Parameters: 3408 + B - the matrix 3409 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3410 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3411 . nz - number of block nonzeros per block row (same for all rows) 3412 - nnz - array containing the number of block nonzeros in the various block rows 3413 (possibly different for each block row) or NULL 3414 3415 Options Database Keys: 3416 . -mat_no_unroll - uses code that does not unroll the loops in the 3417 block calculations (much slower) 3418 . -mat_block_size - size of the blocks to use 3419 3420 Level: intermediate 3421 3422 Notes: 3423 If the nnz parameter is given then the nz parameter is ignored 3424 3425 You can call MatGetInfo() to get information on how effective the preallocation was; 3426 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3427 You can also run with the option -info and look for messages with the string 3428 malloc in them to see if additional memory allocation was needed. 3429 3430 The block AIJ format is fully compatible with standard Fortran 77 3431 storage. That is, the stored row and column indices can begin at 3432 either one (as in Fortran) or zero. See the users' manual for details. 3433 3434 Specify the preallocated storage with either nz or nnz (not both). 3435 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3436 allocation. See Users-Manual: ch_mat for details. 3437 3438 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3439 @*/ 3440 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3441 { 3442 PetscErrorCode ierr; 3443 3444 PetscFunctionBegin; 3445 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3446 PetscValidType(B,1); 3447 PetscValidLogicalCollectiveInt(B,bs,2); 3448 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3449 PetscFunctionReturn(0); 3450 } 3451 3452 /*@C 3453 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3454 (the default sequential PETSc format). 3455 3456 Collective 3457 3458 Input Parameters: 3459 + B - the matrix 3460 . i - the indices into j for the start of each local row (starts with zero) 3461 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3462 - v - optional values in the matrix 3463 3464 Level: developer 3465 3466 Notes: 3467 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3468 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3469 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3470 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3471 block column and the second index is over columns within a block. 3472 3473 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3474 @*/ 3475 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3476 { 3477 PetscErrorCode ierr; 3478 3479 PetscFunctionBegin; 3480 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3481 PetscValidType(B,1); 3482 PetscValidLogicalCollectiveInt(B,bs,2); 3483 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3484 PetscFunctionReturn(0); 3485 } 3486 3487 3488 /*@ 3489 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3490 3491 Collective 3492 3493 Input Parameters: 3494 + comm - must be an MPI communicator of size 1 3495 . bs - size of block 3496 . m - number of rows 3497 . n - number of columns 3498 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3499 . j - column indices 3500 - a - matrix values 3501 3502 Output Parameter: 3503 . mat - the matrix 3504 3505 Level: advanced 3506 3507 Notes: 3508 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3509 once the matrix is destroyed 3510 3511 You cannot set new nonzero locations into this matrix, that will generate an error. 3512 3513 The i and j indices are 0 based 3514 3515 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3516 3517 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3518 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3519 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3520 with column-major ordering within blocks. 3521 3522 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3523 3524 @*/ 3525 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3526 { 3527 PetscErrorCode ierr; 3528 PetscInt ii; 3529 Mat_SeqBAIJ *baij; 3530 3531 PetscFunctionBegin; 3532 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3533 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3534 3535 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3536 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3537 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3538 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3539 baij = (Mat_SeqBAIJ*)(*mat)->data; 3540 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3541 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3542 3543 baij->i = i; 3544 baij->j = j; 3545 baij->a = a; 3546 3547 baij->singlemalloc = PETSC_FALSE; 3548 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3549 baij->free_a = PETSC_FALSE; 3550 baij->free_ij = PETSC_FALSE; 3551 3552 for (ii=0; ii<m; ii++) { 3553 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3554 #if defined(PETSC_USE_DEBUG) 3555 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3556 #endif 3557 } 3558 #if defined(PETSC_USE_DEBUG) 3559 for (ii=0; ii<baij->i[m]; ii++) { 3560 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3561 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3562 } 3563 #endif 3564 3565 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3566 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3567 PetscFunctionReturn(0); 3568 } 3569 3570 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3571 { 3572 PetscErrorCode ierr; 3573 PetscMPIInt size; 3574 3575 PetscFunctionBegin; 3576 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3577 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3578 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3579 } else { 3580 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3581 } 3582 PetscFunctionReturn(0); 3583 } 3584