1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 913 PetscFunctionBegin; 914 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 915 stepval = (n-1)*4; 916 for (k=0; k<m; k++) { /* loop over added rows */ 917 row = im[k]; 918 rp = aj + ai[row]; 919 ap = aa + 16*ai[row]; 920 nrow = ailen[row]; 921 low = 0; 922 high = nrow; 923 for (l=0; l<n; l++) { /* loop over added columns */ 924 col = in[l]; 925 if (col <= lastcol) low = 0; 926 else high = nrow; 927 lastcol = col; 928 value = v + k*(stepval+4 + l)*4; 929 while (high-low > 7) { 930 t = (low+high)/2; 931 if (rp[t] > col) high = t; 932 else low = t; 933 } 934 for (i=low; i<high; i++) { 935 if (rp[i] > col) break; 936 if (rp[i] == col) { 937 bap = ap + 16*i; 938 for (ii=0; ii<4; ii++,value+=stepval) { 939 for (jj=ii; jj<16; jj+=4) { 940 bap[jj] += *value++; 941 } 942 } 943 goto noinsert2; 944 } 945 } 946 N = nrow++ - 1; 947 high++; /* added new column index thus must search to one higher than before */ 948 /* shift up all the later entries in this row */ 949 for (ii=N; ii>=i; ii--) { 950 rp[ii+1] = rp[ii]; 951 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 952 } 953 if (N >= i) { 954 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 955 } 956 rp[i] = col; 957 bap = ap + 16*i; 958 for (ii=0; ii<4; ii++,value+=stepval) { 959 for (jj=ii; jj<16; jj+=4) { 960 bap[jj] = *value++; 961 } 962 } 963 noinsert2:; 964 low = i; 965 } 966 ailen[row] = nrow; 967 } 968 PetscFunctionReturnVoid(); 969 } 970 971 #if defined(PETSC_HAVE_FORTRAN_CAPS) 972 #define matsetvalues4_ MATSETVALUES4 973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 974 #define matsetvalues4_ matsetvalues4 975 #endif 976 977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 978 { 979 Mat A = *AA; 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 981 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 982 PetscInt *ai=a->i,*ailen=a->ilen; 983 PetscInt *aj=a->j,brow,bcol; 984 PetscInt ridx,cidx,lastcol = -1; 985 MatScalar *ap,value,*aa=a->a,*bap; 986 987 PetscFunctionBegin; 988 for (k=0; k<m; k++) { /* loop over added rows */ 989 row = im[k]; brow = row/4; 990 rp = aj + ai[brow]; 991 ap = aa + 16*ai[brow]; 992 nrow = ailen[brow]; 993 low = 0; 994 high = nrow; 995 for (l=0; l<n; l++) { /* loop over added columns */ 996 col = in[l]; bcol = col/4; 997 ridx = row % 4; cidx = col % 4; 998 value = v[l + k*n]; 999 if (col <= lastcol) low = 0; 1000 else high = nrow; 1001 lastcol = col; 1002 while (high-low > 7) { 1003 t = (low+high)/2; 1004 if (rp[t] > bcol) high = t; 1005 else low = t; 1006 } 1007 for (i=low; i<high; i++) { 1008 if (rp[i] > bcol) break; 1009 if (rp[i] == bcol) { 1010 bap = ap + 16*i + 4*cidx + ridx; 1011 *bap += value; 1012 goto noinsert1; 1013 } 1014 } 1015 N = nrow++ - 1; 1016 high++; /* added new column thus must search to one higher than before */ 1017 /* shift up all the later entries in this row */ 1018 for (ii=N; ii>=i; ii--) { 1019 rp[ii+1] = rp[ii]; 1020 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1021 } 1022 if (N>=i) { 1023 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1024 } 1025 rp[i] = bcol; 1026 ap[16*i + 4*cidx + ridx] = value; 1027 noinsert1:; 1028 low = i; 1029 } 1030 ailen[brow] = nrow; 1031 } 1032 PetscFunctionReturnVoid(); 1033 } 1034 1035 /* 1036 Checks for missing diagonals 1037 */ 1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1039 { 1040 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1041 PetscErrorCode ierr; 1042 PetscInt *diag,*ii = a->i,i; 1043 1044 PetscFunctionBegin; 1045 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1046 *missing = PETSC_FALSE; 1047 if (A->rmap->n > 0 && !ii) { 1048 *missing = PETSC_TRUE; 1049 if (d) *d = 0; 1050 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1051 } else { 1052 PetscInt n; 1053 n = PetscMin(a->mbs, a->nbs); 1054 diag = a->diag; 1055 for (i=0; i<n; i++) { 1056 if (diag[i] >= ii[i+1]) { 1057 *missing = PETSC_TRUE; 1058 if (d) *d = i; 1059 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1060 break; 1061 } 1062 } 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1068 { 1069 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1070 PetscErrorCode ierr; 1071 PetscInt i,j,m = a->mbs; 1072 1073 PetscFunctionBegin; 1074 if (!a->diag) { 1075 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1076 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1077 a->free_diag = PETSC_TRUE; 1078 } 1079 for (i=0; i<m; i++) { 1080 a->diag[i] = a->i[i+1]; 1081 for (j=a->i[i]; j<a->i[i+1]; j++) { 1082 if (a->j[j] == i) { 1083 a->diag[i] = j; 1084 break; 1085 } 1086 } 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 1091 1092 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1093 { 1094 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1095 PetscErrorCode ierr; 1096 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1097 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1098 1099 PetscFunctionBegin; 1100 *nn = n; 1101 if (!ia) PetscFunctionReturn(0); 1102 if (symmetric) { 1103 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1104 nz = tia[n]; 1105 } else { 1106 tia = a->i; tja = a->j; 1107 } 1108 1109 if (!blockcompressed && bs > 1) { 1110 (*nn) *= bs; 1111 /* malloc & create the natural set of indices */ 1112 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1113 if (n) { 1114 (*ia)[0] = oshift; 1115 for (j=1; j<bs; j++) { 1116 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1117 } 1118 } 1119 1120 for (i=1; i<n; i++) { 1121 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1122 for (j=1; j<bs; j++) { 1123 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1124 } 1125 } 1126 if (n) { 1127 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1128 } 1129 1130 if (inja) { 1131 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1132 cnt = 0; 1133 for (i=0; i<n; i++) { 1134 for (j=0; j<bs; j++) { 1135 for (k=tia[i]; k<tia[i+1]; k++) { 1136 for (l=0; l<bs; l++) { 1137 (*ja)[cnt++] = bs*tja[k] + l; 1138 } 1139 } 1140 } 1141 } 1142 } 1143 1144 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1145 ierr = PetscFree(tia);CHKERRQ(ierr); 1146 ierr = PetscFree(tja);CHKERRQ(ierr); 1147 } 1148 } else if (oshift == 1) { 1149 if (symmetric) { 1150 nz = tia[A->rmap->n/bs]; 1151 /* add 1 to i and j indices */ 1152 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1153 *ia = tia; 1154 if (ja) { 1155 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1156 *ja = tja; 1157 } 1158 } else { 1159 nz = a->i[A->rmap->n/bs]; 1160 /* malloc space and add 1 to i and j indices */ 1161 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1162 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1163 if (ja) { 1164 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1165 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1166 } 1167 } 1168 } else { 1169 *ia = tia; 1170 if (ja) *ja = tja; 1171 } 1172 PetscFunctionReturn(0); 1173 } 1174 1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1176 { 1177 PetscErrorCode ierr; 1178 1179 PetscFunctionBegin; 1180 if (!ia) PetscFunctionReturn(0); 1181 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1182 ierr = PetscFree(*ia);CHKERRQ(ierr); 1183 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1184 } 1185 PetscFunctionReturn(0); 1186 } 1187 1188 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1189 { 1190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1191 PetscErrorCode ierr; 1192 1193 PetscFunctionBegin; 1194 #if defined(PETSC_USE_LOG) 1195 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1196 #endif 1197 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1198 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1199 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1200 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1201 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1202 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1203 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1204 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1205 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1206 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1207 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1208 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1209 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1210 1211 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1212 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1213 ierr = PetscFree(A->data);CHKERRQ(ierr); 1214 1215 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1226 #if defined(PETSC_HAVE_HYPRE) 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1235 { 1236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 switch (op) { 1241 case MAT_ROW_ORIENTED: 1242 a->roworiented = flg; 1243 break; 1244 case MAT_KEEP_NONZERO_PATTERN: 1245 a->keepnonzeropattern = flg; 1246 break; 1247 case MAT_NEW_NONZERO_LOCATIONS: 1248 a->nonew = (flg ? 0 : 1); 1249 break; 1250 case MAT_NEW_NONZERO_LOCATION_ERR: 1251 a->nonew = (flg ? -1 : 0); 1252 break; 1253 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1254 a->nonew = (flg ? -2 : 0); 1255 break; 1256 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1257 a->nounused = (flg ? -1 : 0); 1258 break; 1259 case MAT_NEW_DIAGONALS: 1260 case MAT_IGNORE_OFF_PROC_ENTRIES: 1261 case MAT_USE_HASH_TABLE: 1262 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1263 break; 1264 case MAT_SPD: 1265 case MAT_SYMMETRIC: 1266 case MAT_STRUCTURALLY_SYMMETRIC: 1267 case MAT_HERMITIAN: 1268 case MAT_SYMMETRY_ETERNAL: 1269 case MAT_SUBMAT_SINGLEIS: 1270 case MAT_STRUCTURE_ONLY: 1271 /* These options are handled directly by MatSetOption() */ 1272 break; 1273 default: 1274 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1275 } 1276 PetscFunctionReturn(0); 1277 } 1278 1279 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1280 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1281 { 1282 PetscErrorCode ierr; 1283 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1284 MatScalar *aa_i; 1285 PetscScalar *v_i; 1286 1287 PetscFunctionBegin; 1288 bs = A->rmap->bs; 1289 bs2 = bs*bs; 1290 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1291 1292 bn = row/bs; /* Block number */ 1293 bp = row % bs; /* Block Position */ 1294 M = ai[bn+1] - ai[bn]; 1295 *nz = bs*M; 1296 1297 if (v) { 1298 *v = 0; 1299 if (*nz) { 1300 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1301 for (i=0; i<M; i++) { /* for each block in the block row */ 1302 v_i = *v + i*bs; 1303 aa_i = aa + bs2*(ai[bn] + i); 1304 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1305 } 1306 } 1307 } 1308 1309 if (idx) { 1310 *idx = 0; 1311 if (*nz) { 1312 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1313 for (i=0; i<M; i++) { /* for each block in the block row */ 1314 idx_i = *idx + i*bs; 1315 itmp = bs*aj[ai[bn] + i]; 1316 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1317 } 1318 } 1319 } 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1324 { 1325 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1326 PetscErrorCode ierr; 1327 1328 PetscFunctionBegin; 1329 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1330 PetscFunctionReturn(0); 1331 } 1332 1333 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1334 { 1335 PetscErrorCode ierr; 1336 1337 PetscFunctionBegin; 1338 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1339 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1340 PetscFunctionReturn(0); 1341 } 1342 1343 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1344 { 1345 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1346 Mat C; 1347 PetscErrorCode ierr; 1348 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1349 PetscInt *rows,*cols,bs2=a->bs2; 1350 MatScalar *array; 1351 1352 PetscFunctionBegin; 1353 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1354 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1355 1356 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1357 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1358 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1359 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1360 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1361 ierr = PetscFree(col);CHKERRQ(ierr); 1362 } else { 1363 C = *B; 1364 } 1365 1366 array = a->a; 1367 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1368 for (i=0; i<mbs; i++) { 1369 cols[0] = i*bs; 1370 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1371 len = ai[i+1] - ai[i]; 1372 for (j=0; j<len; j++) { 1373 rows[0] = (*aj++)*bs; 1374 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1375 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1376 array += bs2; 1377 } 1378 } 1379 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1380 1381 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1382 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1383 1384 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1385 *B = C; 1386 } else { 1387 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1388 } 1389 PetscFunctionReturn(0); 1390 } 1391 1392 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1393 { 1394 PetscErrorCode ierr; 1395 Mat Btrans; 1396 1397 PetscFunctionBegin; 1398 *f = PETSC_FALSE; 1399 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1400 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1401 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 1405 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1406 { 1407 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1408 PetscErrorCode ierr; 1409 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1410 int fd; 1411 PetscScalar *aa; 1412 FILE *file; 1413 1414 PetscFunctionBegin; 1415 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1416 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1417 col_lens[0] = MAT_FILE_CLASSID; 1418 1419 col_lens[1] = A->rmap->N; 1420 col_lens[2] = A->cmap->n; 1421 col_lens[3] = a->nz*bs2; 1422 1423 /* store lengths of each row and write (including header) to file */ 1424 count = 0; 1425 for (i=0; i<a->mbs; i++) { 1426 for (j=0; j<bs; j++) { 1427 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1428 } 1429 } 1430 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1431 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1432 1433 /* store column indices (zero start index) */ 1434 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1435 count = 0; 1436 for (i=0; i<a->mbs; i++) { 1437 for (j=0; j<bs; j++) { 1438 for (k=a->i[i]; k<a->i[i+1]; k++) { 1439 for (l=0; l<bs; l++) { 1440 jj[count++] = bs*a->j[k] + l; 1441 } 1442 } 1443 } 1444 } 1445 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1446 ierr = PetscFree(jj);CHKERRQ(ierr); 1447 1448 /* store nonzero values */ 1449 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1450 count = 0; 1451 for (i=0; i<a->mbs; i++) { 1452 for (j=0; j<bs; j++) { 1453 for (k=a->i[i]; k<a->i[i+1]; k++) { 1454 for (l=0; l<bs; l++) { 1455 aa[count++] = a->a[bs2*k + l*bs + j]; 1456 } 1457 } 1458 } 1459 } 1460 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1461 ierr = PetscFree(aa);CHKERRQ(ierr); 1462 1463 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1464 if (file) { 1465 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1466 } 1467 PetscFunctionReturn(0); 1468 } 1469 1470 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1471 { 1472 PetscErrorCode ierr; 1473 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1474 PetscInt i,bs = A->rmap->bs,k; 1475 1476 PetscFunctionBegin; 1477 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1478 for (i=0; i<a->mbs; i++) { 1479 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1480 for (k=a->i[i]; k<a->i[i+1]; k++) { 1481 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1482 } 1483 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1484 } 1485 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1490 { 1491 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1492 PetscErrorCode ierr; 1493 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1494 PetscViewerFormat format; 1495 1496 PetscFunctionBegin; 1497 if (A->structure_only) { 1498 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1503 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1504 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1505 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1506 const char *matname; 1507 Mat aij; 1508 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1509 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1510 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1511 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1512 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1513 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1514 PetscFunctionReturn(0); 1515 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1516 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1517 for (i=0; i<a->mbs; i++) { 1518 for (j=0; j<bs; j++) { 1519 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1520 for (k=a->i[i]; k<a->i[i+1]; k++) { 1521 for (l=0; l<bs; l++) { 1522 #if defined(PETSC_USE_COMPLEX) 1523 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1524 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1525 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1526 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1527 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1528 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1529 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1530 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1531 } 1532 #else 1533 if (a->a[bs2*k + l*bs + j] != 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1535 } 1536 #endif 1537 } 1538 } 1539 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1540 } 1541 } 1542 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1543 } else { 1544 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1545 for (i=0; i<a->mbs; i++) { 1546 for (j=0; j<bs; j++) { 1547 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1548 for (k=a->i[i]; k<a->i[i+1]; k++) { 1549 for (l=0; l<bs; l++) { 1550 #if defined(PETSC_USE_COMPLEX) 1551 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1552 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1553 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1554 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1555 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1556 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1557 } else { 1558 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1559 } 1560 #else 1561 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1562 #endif 1563 } 1564 } 1565 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1566 } 1567 } 1568 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1569 } 1570 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 #include <petscdraw.h> 1575 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1576 { 1577 Mat A = (Mat) Aa; 1578 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1579 PetscErrorCode ierr; 1580 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1581 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1582 MatScalar *aa; 1583 PetscViewer viewer; 1584 PetscViewerFormat format; 1585 1586 PetscFunctionBegin; 1587 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1588 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1589 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1590 1591 /* loop over matrix elements drawing boxes */ 1592 1593 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1594 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1595 /* Blue for negative, Cyan for zero and Red for positive */ 1596 color = PETSC_DRAW_BLUE; 1597 for (i=0,row=0; i<mbs; i++,row+=bs) { 1598 for (j=a->i[i]; j<a->i[i+1]; j++) { 1599 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1600 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1601 aa = a->a + j*bs2; 1602 for (k=0; k<bs; k++) { 1603 for (l=0; l<bs; l++) { 1604 if (PetscRealPart(*aa++) >= 0.) continue; 1605 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1606 } 1607 } 1608 } 1609 } 1610 color = PETSC_DRAW_CYAN; 1611 for (i=0,row=0; i<mbs; i++,row+=bs) { 1612 for (j=a->i[i]; j<a->i[i+1]; j++) { 1613 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1614 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1615 aa = a->a + j*bs2; 1616 for (k=0; k<bs; k++) { 1617 for (l=0; l<bs; l++) { 1618 if (PetscRealPart(*aa++) != 0.) continue; 1619 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1620 } 1621 } 1622 } 1623 } 1624 color = PETSC_DRAW_RED; 1625 for (i=0,row=0; i<mbs; i++,row+=bs) { 1626 for (j=a->i[i]; j<a->i[i+1]; j++) { 1627 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1628 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1629 aa = a->a + j*bs2; 1630 for (k=0; k<bs; k++) { 1631 for (l=0; l<bs; l++) { 1632 if (PetscRealPart(*aa++) <= 0.) continue; 1633 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1634 } 1635 } 1636 } 1637 } 1638 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1639 } else { 1640 /* use contour shading to indicate magnitude of values */ 1641 /* first determine max of all nonzero values */ 1642 PetscReal minv = 0.0, maxv = 0.0; 1643 PetscDraw popup; 1644 1645 for (i=0; i<a->nz*a->bs2; i++) { 1646 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1647 } 1648 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1649 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1650 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1651 1652 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1653 for (i=0,row=0; i<mbs; i++,row+=bs) { 1654 for (j=a->i[i]; j<a->i[i+1]; j++) { 1655 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1656 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1657 aa = a->a + j*bs2; 1658 for (k=0; k<bs; k++) { 1659 for (l=0; l<bs; l++) { 1660 MatScalar v = *aa++; 1661 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1662 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1663 } 1664 } 1665 } 1666 } 1667 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1668 } 1669 PetscFunctionReturn(0); 1670 } 1671 1672 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1673 { 1674 PetscErrorCode ierr; 1675 PetscReal xl,yl,xr,yr,w,h; 1676 PetscDraw draw; 1677 PetscBool isnull; 1678 1679 PetscFunctionBegin; 1680 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1681 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1682 if (isnull) PetscFunctionReturn(0); 1683 1684 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1685 xr += w; yr += h; xl = -w; yl = -h; 1686 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1687 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1688 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1689 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1690 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1695 { 1696 PetscErrorCode ierr; 1697 PetscBool iascii,isbinary,isdraw; 1698 1699 PetscFunctionBegin; 1700 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1701 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1702 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1703 if (iascii) { 1704 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1705 } else if (isbinary) { 1706 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1707 } else if (isdraw) { 1708 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1709 } else { 1710 Mat B; 1711 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1712 ierr = MatView(B,viewer);CHKERRQ(ierr); 1713 ierr = MatDestroy(&B);CHKERRQ(ierr); 1714 } 1715 PetscFunctionReturn(0); 1716 } 1717 1718 1719 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1720 { 1721 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1722 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1723 PetscInt *ai = a->i,*ailen = a->ilen; 1724 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1725 MatScalar *ap,*aa = a->a; 1726 1727 PetscFunctionBegin; 1728 for (k=0; k<m; k++) { /* loop over rows */ 1729 row = im[k]; brow = row/bs; 1730 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1731 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1732 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1733 nrow = ailen[brow]; 1734 for (l=0; l<n; l++) { /* loop over columns */ 1735 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1736 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1737 col = in[l]; 1738 bcol = col/bs; 1739 cidx = col%bs; 1740 ridx = row%bs; 1741 high = nrow; 1742 low = 0; /* assume unsorted */ 1743 while (high-low > 5) { 1744 t = (low+high)/2; 1745 if (rp[t] > bcol) high = t; 1746 else low = t; 1747 } 1748 for (i=low; i<high; i++) { 1749 if (rp[i] > bcol) break; 1750 if (rp[i] == bcol) { 1751 *v++ = ap[bs2*i+bs*cidx+ridx]; 1752 goto finished; 1753 } 1754 } 1755 *v++ = 0.0; 1756 finished:; 1757 } 1758 } 1759 PetscFunctionReturn(0); 1760 } 1761 1762 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1763 { 1764 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1765 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1766 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1767 PetscErrorCode ierr; 1768 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1769 PetscBool roworiented=a->roworiented; 1770 const PetscScalar *value = v; 1771 MatScalar *ap=NULL,*aa = a->a,*bap; 1772 1773 PetscFunctionBegin; 1774 if (roworiented) { 1775 stepval = (n-1)*bs; 1776 } else { 1777 stepval = (m-1)*bs; 1778 } 1779 for (k=0; k<m; k++) { /* loop over added rows */ 1780 row = im[k]; 1781 if (row < 0) continue; 1782 #if defined(PETSC_USE_DEBUG) 1783 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1784 #endif 1785 rp = aj + ai[row]; 1786 if (!A->structure_only) ap = aa + bs2*ai[row]; 1787 rmax = imax[row]; 1788 nrow = ailen[row]; 1789 low = 0; 1790 high = nrow; 1791 for (l=0; l<n; l++) { /* loop over added columns */ 1792 if (in[l] < 0) continue; 1793 #if defined(PETSC_USE_DEBUG) 1794 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1795 #endif 1796 col = in[l]; 1797 if (!A->structure_only) { 1798 if (roworiented) { 1799 value = v + (k*(stepval+bs) + l)*bs; 1800 } else { 1801 value = v + (l*(stepval+bs) + k)*bs; 1802 } 1803 } 1804 if (col <= lastcol) low = 0; 1805 else high = nrow; 1806 lastcol = col; 1807 while (high-low > 7) { 1808 t = (low+high)/2; 1809 if (rp[t] > col) high = t; 1810 else low = t; 1811 } 1812 for (i=low; i<high; i++) { 1813 if (rp[i] > col) break; 1814 if (rp[i] == col) { 1815 if (A->structure_only) goto noinsert2; 1816 bap = ap + bs2*i; 1817 if (roworiented) { 1818 if (is == ADD_VALUES) { 1819 for (ii=0; ii<bs; ii++,value+=stepval) { 1820 for (jj=ii; jj<bs2; jj+=bs) { 1821 bap[jj] += *value++; 1822 } 1823 } 1824 } else { 1825 for (ii=0; ii<bs; ii++,value+=stepval) { 1826 for (jj=ii; jj<bs2; jj+=bs) { 1827 bap[jj] = *value++; 1828 } 1829 } 1830 } 1831 } else { 1832 if (is == ADD_VALUES) { 1833 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1834 for (jj=0; jj<bs; jj++) { 1835 bap[jj] += value[jj]; 1836 } 1837 bap += bs; 1838 } 1839 } else { 1840 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1841 for (jj=0; jj<bs; jj++) { 1842 bap[jj] = value[jj]; 1843 } 1844 bap += bs; 1845 } 1846 } 1847 } 1848 goto noinsert2; 1849 } 1850 } 1851 if (nonew == 1) goto noinsert2; 1852 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1853 if (A->structure_only) { 1854 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1855 } else { 1856 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1857 } 1858 N = nrow++ - 1; high++; 1859 /* shift up all the later entries in this row */ 1860 for (ii=N; ii>=i; ii--) { 1861 rp[ii+1] = rp[ii]; 1862 if (!A->structure_only) { 1863 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1864 } 1865 } 1866 if (N >= i && !A->structure_only) { 1867 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1868 } 1869 1870 rp[i] = col; 1871 if (!A->structure_only) { 1872 bap = ap + bs2*i; 1873 if (roworiented) { 1874 for (ii=0; ii<bs; ii++,value+=stepval) { 1875 for (jj=ii; jj<bs2; jj+=bs) { 1876 bap[jj] = *value++; 1877 } 1878 } 1879 } else { 1880 for (ii=0; ii<bs; ii++,value+=stepval) { 1881 for (jj=0; jj<bs; jj++) { 1882 *bap++ = *value++; 1883 } 1884 } 1885 } 1886 } 1887 noinsert2:; 1888 low = i; 1889 } 1890 ailen[row] = nrow; 1891 } 1892 PetscFunctionReturn(0); 1893 } 1894 1895 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1896 { 1897 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1898 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1899 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1900 PetscErrorCode ierr; 1901 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1902 MatScalar *aa = a->a,*ap; 1903 PetscReal ratio=0.6; 1904 1905 PetscFunctionBegin; 1906 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1907 1908 if (m) rmax = ailen[0]; 1909 for (i=1; i<mbs; i++) { 1910 /* move each row back by the amount of empty slots (fshift) before it*/ 1911 fshift += imax[i-1] - ailen[i-1]; 1912 rmax = PetscMax(rmax,ailen[i]); 1913 if (fshift) { 1914 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1915 N = ailen[i]; 1916 for (j=0; j<N; j++) { 1917 ip[j-fshift] = ip[j]; 1918 if (!A->structure_only) { 1919 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1920 } 1921 } 1922 } 1923 ai[i] = ai[i-1] + ailen[i-1]; 1924 } 1925 if (mbs) { 1926 fshift += imax[mbs-1] - ailen[mbs-1]; 1927 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1928 } 1929 1930 /* reset ilen and imax for each row */ 1931 a->nonzerorowcnt = 0; 1932 if (A->structure_only) { 1933 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1934 } else { /* !A->structure_only */ 1935 for (i=0; i<mbs; i++) { 1936 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1937 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1938 } 1939 } 1940 a->nz = ai[mbs]; 1941 1942 /* diagonals may have moved, so kill the diagonal pointers */ 1943 a->idiagvalid = PETSC_FALSE; 1944 if (fshift && a->diag) { 1945 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1946 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1947 a->diag = 0; 1948 } 1949 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1950 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1951 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1953 1954 A->info.mallocs += a->reallocs; 1955 a->reallocs = 0; 1956 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1957 a->rmax = rmax; 1958 1959 if (!A->structure_only) { 1960 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1961 } 1962 PetscFunctionReturn(0); 1963 } 1964 1965 /* 1966 This function returns an array of flags which indicate the locations of contiguous 1967 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1968 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1969 Assume: sizes should be long enough to hold all the values. 1970 */ 1971 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1972 { 1973 PetscInt i,j,k,row; 1974 PetscBool flg; 1975 1976 PetscFunctionBegin; 1977 for (i=0,j=0; i<n; j++) { 1978 row = idx[i]; 1979 if (row%bs!=0) { /* Not the begining of a block */ 1980 sizes[j] = 1; 1981 i++; 1982 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1983 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1984 i++; 1985 } else { /* Begining of the block, so check if the complete block exists */ 1986 flg = PETSC_TRUE; 1987 for (k=1; k<bs; k++) { 1988 if (row+k != idx[i+k]) { /* break in the block */ 1989 flg = PETSC_FALSE; 1990 break; 1991 } 1992 } 1993 if (flg) { /* No break in the bs */ 1994 sizes[j] = bs; 1995 i += bs; 1996 } else { 1997 sizes[j] = 1; 1998 i++; 1999 } 2000 } 2001 } 2002 *bs_max = j; 2003 PetscFunctionReturn(0); 2004 } 2005 2006 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2007 { 2008 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2009 PetscErrorCode ierr; 2010 PetscInt i,j,k,count,*rows; 2011 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2012 PetscScalar zero = 0.0; 2013 MatScalar *aa; 2014 const PetscScalar *xx; 2015 PetscScalar *bb; 2016 2017 PetscFunctionBegin; 2018 /* fix right hand side if needed */ 2019 if (x && b) { 2020 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2021 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2022 for (i=0; i<is_n; i++) { 2023 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2024 } 2025 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2026 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2027 } 2028 2029 /* Make a copy of the IS and sort it */ 2030 /* allocate memory for rows,sizes */ 2031 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2032 2033 /* copy IS values to rows, and sort them */ 2034 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2035 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2036 2037 if (baij->keepnonzeropattern) { 2038 for (i=0; i<is_n; i++) sizes[i] = 1; 2039 bs_max = is_n; 2040 } else { 2041 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2042 A->nonzerostate++; 2043 } 2044 2045 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2046 row = rows[j]; 2047 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2048 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2049 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2050 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2051 if (diag != (PetscScalar)0.0) { 2052 if (baij->ilen[row/bs] > 0) { 2053 baij->ilen[row/bs] = 1; 2054 baij->j[baij->i[row/bs]] = row/bs; 2055 2056 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2057 } 2058 /* Now insert all the diagonal values for this bs */ 2059 for (k=0; k<bs; k++) { 2060 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2061 } 2062 } else { /* (diag == 0.0) */ 2063 baij->ilen[row/bs] = 0; 2064 } /* end (diag == 0.0) */ 2065 } else { /* (sizes[i] != bs) */ 2066 #if defined(PETSC_USE_DEBUG) 2067 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2068 #endif 2069 for (k=0; k<count; k++) { 2070 aa[0] = zero; 2071 aa += bs; 2072 } 2073 if (diag != (PetscScalar)0.0) { 2074 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2075 } 2076 } 2077 } 2078 2079 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2080 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2085 { 2086 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2087 PetscErrorCode ierr; 2088 PetscInt i,j,k,count; 2089 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2090 PetscScalar zero = 0.0; 2091 MatScalar *aa; 2092 const PetscScalar *xx; 2093 PetscScalar *bb; 2094 PetscBool *zeroed,vecs = PETSC_FALSE; 2095 2096 PetscFunctionBegin; 2097 /* fix right hand side if needed */ 2098 if (x && b) { 2099 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2100 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2101 vecs = PETSC_TRUE; 2102 } 2103 2104 /* zero the columns */ 2105 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2106 for (i=0; i<is_n; i++) { 2107 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2108 zeroed[is_idx[i]] = PETSC_TRUE; 2109 } 2110 for (i=0; i<A->rmap->N; i++) { 2111 if (!zeroed[i]) { 2112 row = i/bs; 2113 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2114 for (k=0; k<bs; k++) { 2115 col = bs*baij->j[j] + k; 2116 if (zeroed[col]) { 2117 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2118 if (vecs) bb[i] -= aa[0]*xx[col]; 2119 aa[0] = 0.0; 2120 } 2121 } 2122 } 2123 } else if (vecs) bb[i] = diag*xx[i]; 2124 } 2125 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2126 if (vecs) { 2127 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2128 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2129 } 2130 2131 /* zero the rows */ 2132 for (i=0; i<is_n; i++) { 2133 row = is_idx[i]; 2134 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2135 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2136 for (k=0; k<count; k++) { 2137 aa[0] = zero; 2138 aa += bs; 2139 } 2140 if (diag != (PetscScalar)0.0) { 2141 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2142 } 2143 } 2144 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2145 PetscFunctionReturn(0); 2146 } 2147 2148 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2149 { 2150 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2151 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2152 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2153 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2154 PetscErrorCode ierr; 2155 PetscInt ridx,cidx,bs2=a->bs2; 2156 PetscBool roworiented=a->roworiented; 2157 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2158 2159 PetscFunctionBegin; 2160 for (k=0; k<m; k++) { /* loop over added rows */ 2161 row = im[k]; 2162 brow = row/bs; 2163 if (row < 0) continue; 2164 #if defined(PETSC_USE_DEBUG) 2165 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2166 #endif 2167 rp = aj + ai[brow]; 2168 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2169 rmax = imax[brow]; 2170 nrow = ailen[brow]; 2171 low = 0; 2172 high = nrow; 2173 for (l=0; l<n; l++) { /* loop over added columns */ 2174 if (in[l] < 0) continue; 2175 #if defined(PETSC_USE_DEBUG) 2176 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2177 #endif 2178 col = in[l]; bcol = col/bs; 2179 ridx = row % bs; cidx = col % bs; 2180 if (!A->structure_only) { 2181 if (roworiented) { 2182 value = v[l + k*n]; 2183 } else { 2184 value = v[k + l*m]; 2185 } 2186 } 2187 if (col <= lastcol) low = 0; else high = nrow; 2188 lastcol = col; 2189 while (high-low > 7) { 2190 t = (low+high)/2; 2191 if (rp[t] > bcol) high = t; 2192 else low = t; 2193 } 2194 for (i=low; i<high; i++) { 2195 if (rp[i] > bcol) break; 2196 if (rp[i] == bcol) { 2197 bap = ap + bs2*i + bs*cidx + ridx; 2198 if (!A->structure_only) { 2199 if (is == ADD_VALUES) *bap += value; 2200 else *bap = value; 2201 } 2202 goto noinsert1; 2203 } 2204 } 2205 if (nonew == 1) goto noinsert1; 2206 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2207 if (A->structure_only) { 2208 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2209 } else { 2210 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2211 } 2212 N = nrow++ - 1; high++; 2213 /* shift up all the later entries in this row */ 2214 for (ii=N; ii>=i; ii--) { 2215 rp[ii+1] = rp[ii]; 2216 if (!A->structure_only) { 2217 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2218 } 2219 } 2220 if (N>=i && !A->structure_only) { 2221 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2222 } 2223 rp[i] = bcol; 2224 if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value; 2225 a->nz++; 2226 A->nonzerostate++; 2227 noinsert1:; 2228 low = i; 2229 } 2230 ailen[brow] = nrow; 2231 } 2232 PetscFunctionReturn(0); 2233 } 2234 2235 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2236 { 2237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2238 Mat outA; 2239 PetscErrorCode ierr; 2240 PetscBool row_identity,col_identity; 2241 2242 PetscFunctionBegin; 2243 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2244 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2245 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2246 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2247 2248 outA = inA; 2249 inA->factortype = MAT_FACTOR_LU; 2250 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2251 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2252 2253 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2254 2255 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2256 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2257 a->row = row; 2258 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2259 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2260 a->col = col; 2261 2262 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2263 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2264 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2265 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2266 2267 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2268 if (!a->solve_work) { 2269 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2270 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2271 } 2272 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2273 PetscFunctionReturn(0); 2274 } 2275 2276 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2277 { 2278 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2279 PetscInt i,nz,mbs; 2280 2281 PetscFunctionBegin; 2282 nz = baij->maxnz; 2283 mbs = baij->mbs; 2284 for (i=0; i<nz; i++) { 2285 baij->j[i] = indices[i]; 2286 } 2287 baij->nz = nz; 2288 for (i=0; i<mbs; i++) { 2289 baij->ilen[i] = baij->imax[i]; 2290 } 2291 PetscFunctionReturn(0); 2292 } 2293 2294 /*@ 2295 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2296 in the matrix. 2297 2298 Input Parameters: 2299 + mat - the SeqBAIJ matrix 2300 - indices - the column indices 2301 2302 Level: advanced 2303 2304 Notes: 2305 This can be called if you have precomputed the nonzero structure of the 2306 matrix and want to provide it to the matrix object to improve the performance 2307 of the MatSetValues() operation. 2308 2309 You MUST have set the correct numbers of nonzeros per row in the call to 2310 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2311 2312 MUST be called before any calls to MatSetValues(); 2313 2314 @*/ 2315 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2316 { 2317 PetscErrorCode ierr; 2318 2319 PetscFunctionBegin; 2320 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2321 PetscValidPointer(indices,2); 2322 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2323 PetscFunctionReturn(0); 2324 } 2325 2326 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2327 { 2328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2329 PetscErrorCode ierr; 2330 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2331 PetscReal atmp; 2332 PetscScalar *x,zero = 0.0; 2333 MatScalar *aa; 2334 PetscInt ncols,brow,krow,kcol; 2335 2336 PetscFunctionBegin; 2337 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2338 bs = A->rmap->bs; 2339 aa = a->a; 2340 ai = a->i; 2341 aj = a->j; 2342 mbs = a->mbs; 2343 2344 ierr = VecSet(v,zero);CHKERRQ(ierr); 2345 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2346 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2347 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2348 for (i=0; i<mbs; i++) { 2349 ncols = ai[1] - ai[0]; ai++; 2350 brow = bs*i; 2351 for (j=0; j<ncols; j++) { 2352 for (kcol=0; kcol<bs; kcol++) { 2353 for (krow=0; krow<bs; krow++) { 2354 atmp = PetscAbsScalar(*aa);aa++; 2355 row = brow + krow; /* row index */ 2356 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2357 } 2358 } 2359 aj++; 2360 } 2361 } 2362 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2363 PetscFunctionReturn(0); 2364 } 2365 2366 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2367 { 2368 PetscErrorCode ierr; 2369 2370 PetscFunctionBegin; 2371 /* If the two matrices have the same copy implementation, use fast copy. */ 2372 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2373 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2374 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2375 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2376 2377 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2378 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2379 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2380 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2381 } else { 2382 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2383 } 2384 PetscFunctionReturn(0); 2385 } 2386 2387 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2388 { 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2397 { 2398 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2399 2400 PetscFunctionBegin; 2401 *array = a->a; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2406 { 2407 PetscFunctionBegin; 2408 PetscFunctionReturn(0); 2409 } 2410 2411 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2412 { 2413 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2414 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2415 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2416 PetscErrorCode ierr; 2417 2418 PetscFunctionBegin; 2419 /* Set the number of nonzeros in the new matrix */ 2420 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2421 PetscFunctionReturn(0); 2422 } 2423 2424 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2425 { 2426 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2427 PetscErrorCode ierr; 2428 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2429 PetscBLASInt one=1; 2430 2431 PetscFunctionBegin; 2432 if (str == SAME_NONZERO_PATTERN) { 2433 PetscScalar alpha = a; 2434 PetscBLASInt bnz; 2435 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2436 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2437 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2438 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2439 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2440 } else { 2441 Mat B; 2442 PetscInt *nnz; 2443 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2444 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2445 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2446 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2447 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2448 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2449 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2450 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2451 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2452 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2453 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2454 ierr = PetscFree(nnz);CHKERRQ(ierr); 2455 } 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2460 { 2461 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2462 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2463 MatScalar *aa = a->a; 2464 2465 PetscFunctionBegin; 2466 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2471 { 2472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2473 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2474 MatScalar *aa = a->a; 2475 2476 PetscFunctionBegin; 2477 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 /* 2482 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2483 */ 2484 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2485 { 2486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2487 PetscErrorCode ierr; 2488 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2489 PetscInt nz = a->i[m],row,*jj,mr,col; 2490 2491 PetscFunctionBegin; 2492 *nn = n; 2493 if (!ia) PetscFunctionReturn(0); 2494 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2495 else { 2496 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2497 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2498 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2499 jj = a->j; 2500 for (i=0; i<nz; i++) { 2501 collengths[jj[i]]++; 2502 } 2503 cia[0] = oshift; 2504 for (i=0; i<n; i++) { 2505 cia[i+1] = cia[i] + collengths[i]; 2506 } 2507 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2508 jj = a->j; 2509 for (row=0; row<m; row++) { 2510 mr = a->i[row+1] - a->i[row]; 2511 for (i=0; i<mr; i++) { 2512 col = *jj++; 2513 2514 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2515 } 2516 } 2517 ierr = PetscFree(collengths);CHKERRQ(ierr); 2518 *ia = cia; *ja = cja; 2519 } 2520 PetscFunctionReturn(0); 2521 } 2522 2523 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2524 { 2525 PetscErrorCode ierr; 2526 2527 PetscFunctionBegin; 2528 if (!ia) PetscFunctionReturn(0); 2529 ierr = PetscFree(*ia);CHKERRQ(ierr); 2530 ierr = PetscFree(*ja);CHKERRQ(ierr); 2531 PetscFunctionReturn(0); 2532 } 2533 2534 /* 2535 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2536 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2537 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2538 */ 2539 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2540 { 2541 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2542 PetscErrorCode ierr; 2543 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2544 PetscInt nz = a->i[m],row,*jj,mr,col; 2545 PetscInt *cspidx; 2546 2547 PetscFunctionBegin; 2548 *nn = n; 2549 if (!ia) PetscFunctionReturn(0); 2550 2551 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2552 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2553 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2554 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2555 jj = a->j; 2556 for (i=0; i<nz; i++) { 2557 collengths[jj[i]]++; 2558 } 2559 cia[0] = oshift; 2560 for (i=0; i<n; i++) { 2561 cia[i+1] = cia[i] + collengths[i]; 2562 } 2563 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2564 jj = a->j; 2565 for (row=0; row<m; row++) { 2566 mr = a->i[row+1] - a->i[row]; 2567 for (i=0; i<mr; i++) { 2568 col = *jj++; 2569 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2570 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2571 } 2572 } 2573 ierr = PetscFree(collengths);CHKERRQ(ierr); 2574 *ia = cia; *ja = cja; 2575 *spidx = cspidx; 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2580 { 2581 PetscErrorCode ierr; 2582 2583 PetscFunctionBegin; 2584 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2585 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2590 { 2591 PetscErrorCode ierr; 2592 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2593 2594 PetscFunctionBegin; 2595 if (!Y->preallocated || !aij->nz) { 2596 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2597 } 2598 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2599 PetscFunctionReturn(0); 2600 } 2601 2602 /* -------------------------------------------------------------------*/ 2603 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2604 MatGetRow_SeqBAIJ, 2605 MatRestoreRow_SeqBAIJ, 2606 MatMult_SeqBAIJ_N, 2607 /* 4*/ MatMultAdd_SeqBAIJ_N, 2608 MatMultTranspose_SeqBAIJ, 2609 MatMultTransposeAdd_SeqBAIJ, 2610 0, 2611 0, 2612 0, 2613 /* 10*/ 0, 2614 MatLUFactor_SeqBAIJ, 2615 0, 2616 0, 2617 MatTranspose_SeqBAIJ, 2618 /* 15*/ MatGetInfo_SeqBAIJ, 2619 MatEqual_SeqBAIJ, 2620 MatGetDiagonal_SeqBAIJ, 2621 MatDiagonalScale_SeqBAIJ, 2622 MatNorm_SeqBAIJ, 2623 /* 20*/ 0, 2624 MatAssemblyEnd_SeqBAIJ, 2625 MatSetOption_SeqBAIJ, 2626 MatZeroEntries_SeqBAIJ, 2627 /* 24*/ MatZeroRows_SeqBAIJ, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /* 29*/ MatSetUp_SeqBAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /* 34*/ MatDuplicate_SeqBAIJ, 2638 0, 2639 0, 2640 MatILUFactor_SeqBAIJ, 2641 0, 2642 /* 39*/ MatAXPY_SeqBAIJ, 2643 MatCreateSubMatrices_SeqBAIJ, 2644 MatIncreaseOverlap_SeqBAIJ, 2645 MatGetValues_SeqBAIJ, 2646 MatCopy_SeqBAIJ, 2647 /* 44*/ 0, 2648 MatScale_SeqBAIJ, 2649 MatShift_SeqBAIJ, 2650 0, 2651 MatZeroRowsColumns_SeqBAIJ, 2652 /* 49*/ 0, 2653 MatGetRowIJ_SeqBAIJ, 2654 MatRestoreRowIJ_SeqBAIJ, 2655 MatGetColumnIJ_SeqBAIJ, 2656 MatRestoreColumnIJ_SeqBAIJ, 2657 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2658 0, 2659 0, 2660 0, 2661 MatSetValuesBlocked_SeqBAIJ, 2662 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2663 MatDestroy_SeqBAIJ, 2664 MatView_SeqBAIJ, 2665 0, 2666 0, 2667 /* 64*/ 0, 2668 0, 2669 0, 2670 0, 2671 0, 2672 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2673 0, 2674 MatConvert_Basic, 2675 0, 2676 0, 2677 /* 74*/ 0, 2678 MatFDColoringApply_BAIJ, 2679 0, 2680 0, 2681 0, 2682 /* 79*/ 0, 2683 0, 2684 0, 2685 0, 2686 MatLoad_SeqBAIJ, 2687 /* 84*/ 0, 2688 0, 2689 0, 2690 0, 2691 0, 2692 /* 89*/ 0, 2693 0, 2694 0, 2695 0, 2696 0, 2697 /* 94*/ 0, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /* 99*/ 0, 2703 0, 2704 0, 2705 0, 2706 0, 2707 /*104*/ 0, 2708 MatRealPart_SeqBAIJ, 2709 MatImaginaryPart_SeqBAIJ, 2710 0, 2711 0, 2712 /*109*/ 0, 2713 0, 2714 0, 2715 0, 2716 MatMissingDiagonal_SeqBAIJ, 2717 /*114*/ 0, 2718 0, 2719 0, 2720 0, 2721 0, 2722 /*119*/ 0, 2723 0, 2724 MatMultHermitianTranspose_SeqBAIJ, 2725 MatMultHermitianTransposeAdd_SeqBAIJ, 2726 0, 2727 /*124*/ 0, 2728 0, 2729 MatInvertBlockDiagonal_SeqBAIJ, 2730 0, 2731 0, 2732 /*129*/ 0, 2733 0, 2734 0, 2735 0, 2736 0, 2737 /*134*/ 0, 2738 0, 2739 0, 2740 0, 2741 0, 2742 /*139*/ MatSetBlockSizes_Default, 2743 0, 2744 0, 2745 MatFDColoringSetUp_SeqXAIJ, 2746 0, 2747 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2748 MatDestroySubMatrices_SeqBAIJ 2749 }; 2750 2751 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2752 { 2753 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2754 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2759 2760 /* allocate space for values if not already there */ 2761 if (!aij->saved_values) { 2762 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2763 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2764 } 2765 2766 /* copy values over */ 2767 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2768 PetscFunctionReturn(0); 2769 } 2770 2771 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2772 { 2773 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2774 PetscErrorCode ierr; 2775 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2776 2777 PetscFunctionBegin; 2778 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2779 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2780 2781 /* copy values over */ 2782 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2787 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2788 2789 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2790 { 2791 Mat_SeqBAIJ *b; 2792 PetscErrorCode ierr; 2793 PetscInt i,mbs,nbs,bs2; 2794 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2795 2796 PetscFunctionBegin; 2797 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2798 if (nz == MAT_SKIP_ALLOCATION) { 2799 skipallocation = PETSC_TRUE; 2800 nz = 0; 2801 } 2802 2803 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2804 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2805 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2806 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2807 2808 B->preallocated = PETSC_TRUE; 2809 2810 mbs = B->rmap->n/bs; 2811 nbs = B->cmap->n/bs; 2812 bs2 = bs*bs; 2813 2814 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2815 2816 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2817 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2818 if (nnz) { 2819 for (i=0; i<mbs; i++) { 2820 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2821 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2822 } 2823 } 2824 2825 b = (Mat_SeqBAIJ*)B->data; 2826 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2827 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2828 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2829 2830 if (!flg) { 2831 switch (bs) { 2832 case 1: 2833 B->ops->mult = MatMult_SeqBAIJ_1; 2834 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2835 break; 2836 case 2: 2837 B->ops->mult = MatMult_SeqBAIJ_2; 2838 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2839 break; 2840 case 3: 2841 B->ops->mult = MatMult_SeqBAIJ_3; 2842 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2843 break; 2844 case 4: 2845 B->ops->mult = MatMult_SeqBAIJ_4; 2846 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2847 break; 2848 case 5: 2849 B->ops->mult = MatMult_SeqBAIJ_5; 2850 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2851 break; 2852 case 6: 2853 B->ops->mult = MatMult_SeqBAIJ_6; 2854 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2855 break; 2856 case 7: 2857 B->ops->mult = MatMult_SeqBAIJ_7; 2858 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2859 break; 2860 case 9: 2861 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2862 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2863 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2864 #else 2865 B->ops->mult = MatMult_SeqBAIJ_N; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2867 #endif 2868 break; 2869 case 11: 2870 B->ops->mult = MatMult_SeqBAIJ_11; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2872 break; 2873 case 15: 2874 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2875 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2876 break; 2877 default: 2878 B->ops->mult = MatMult_SeqBAIJ_N; 2879 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2880 break; 2881 } 2882 } 2883 B->ops->sor = MatSOR_SeqBAIJ; 2884 b->mbs = mbs; 2885 b->nbs = nbs; 2886 if (!skipallocation) { 2887 if (!b->imax) { 2888 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2889 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2890 2891 b->free_imax_ilen = PETSC_TRUE; 2892 } 2893 /* b->ilen will count nonzeros in each block row so far. */ 2894 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2895 if (!nnz) { 2896 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2897 else if (nz < 0) nz = 1; 2898 nz = PetscMin(nz,nbs); 2899 for (i=0; i<mbs; i++) b->imax[i] = nz; 2900 nz = nz*mbs; 2901 } else { 2902 nz = 0; 2903 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2904 } 2905 2906 /* allocate the matrix space */ 2907 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2908 if (B->structure_only) { 2909 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2910 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2911 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2912 } else { 2913 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2914 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2915 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2916 } 2917 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2918 2919 if (B->structure_only) { 2920 b->singlemalloc = PETSC_FALSE; 2921 b->free_a = PETSC_FALSE; 2922 } else { 2923 b->singlemalloc = PETSC_TRUE; 2924 b->free_a = PETSC_TRUE; 2925 } 2926 b->free_ij = PETSC_TRUE; 2927 2928 b->i[0] = 0; 2929 for (i=1; i<mbs+1; i++) { 2930 b->i[i] = b->i[i-1] + b->imax[i-1]; 2931 } 2932 2933 } else { 2934 b->free_a = PETSC_FALSE; 2935 b->free_ij = PETSC_FALSE; 2936 } 2937 2938 b->bs2 = bs2; 2939 b->mbs = mbs; 2940 b->nz = 0; 2941 b->maxnz = nz; 2942 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2943 B->was_assembled = PETSC_FALSE; 2944 B->assembled = PETSC_FALSE; 2945 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2946 PetscFunctionReturn(0); 2947 } 2948 2949 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2950 { 2951 PetscInt i,m,nz,nz_max=0,*nnz; 2952 PetscScalar *values=0; 2953 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2954 PetscErrorCode ierr; 2955 2956 PetscFunctionBegin; 2957 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2958 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2959 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2960 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2961 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2962 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2963 m = B->rmap->n/bs; 2964 2965 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2966 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2967 for (i=0; i<m; i++) { 2968 nz = ii[i+1]- ii[i]; 2969 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2970 nz_max = PetscMax(nz_max, nz); 2971 nnz[i] = nz; 2972 } 2973 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2974 ierr = PetscFree(nnz);CHKERRQ(ierr); 2975 2976 values = (PetscScalar*)V; 2977 if (!values) { 2978 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2979 } 2980 for (i=0; i<m; i++) { 2981 PetscInt ncols = ii[i+1] - ii[i]; 2982 const PetscInt *icols = jj + ii[i]; 2983 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2984 if (!roworiented) { 2985 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2986 } else { 2987 PetscInt j; 2988 for (j=0; j<ncols; j++) { 2989 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2990 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2991 } 2992 } 2993 } 2994 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2995 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2996 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2997 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2998 PetscFunctionReturn(0); 2999 } 3000 3001 /*MC 3002 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3003 block sparse compressed row format. 3004 3005 Options Database Keys: 3006 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3007 3008 Level: beginner 3009 3010 .seealso: MatCreateSeqBAIJ() 3011 M*/ 3012 3013 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3014 3015 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3016 { 3017 PetscErrorCode ierr; 3018 PetscMPIInt size; 3019 Mat_SeqBAIJ *b; 3020 3021 PetscFunctionBegin; 3022 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3023 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3024 3025 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3026 B->data = (void*)b; 3027 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3028 3029 b->row = 0; 3030 b->col = 0; 3031 b->icol = 0; 3032 b->reallocs = 0; 3033 b->saved_values = 0; 3034 3035 b->roworiented = PETSC_TRUE; 3036 b->nonew = 0; 3037 b->diag = 0; 3038 B->spptr = 0; 3039 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3040 b->keepnonzeropattern = PETSC_FALSE; 3041 3042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3051 #if defined(PETSC_HAVE_HYPRE) 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3053 #endif 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3056 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3057 PetscFunctionReturn(0); 3058 } 3059 3060 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3061 { 3062 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3063 PetscErrorCode ierr; 3064 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3065 3066 PetscFunctionBegin; 3067 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3068 3069 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3070 c->imax = a->imax; 3071 c->ilen = a->ilen; 3072 c->free_imax_ilen = PETSC_FALSE; 3073 } else { 3074 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3075 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3076 for (i=0; i<mbs; i++) { 3077 c->imax[i] = a->imax[i]; 3078 c->ilen[i] = a->ilen[i]; 3079 } 3080 c->free_imax_ilen = PETSC_TRUE; 3081 } 3082 3083 /* allocate the matrix space */ 3084 if (mallocmatspace) { 3085 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3086 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3087 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3088 3089 c->i = a->i; 3090 c->j = a->j; 3091 c->singlemalloc = PETSC_FALSE; 3092 c->free_a = PETSC_TRUE; 3093 c->free_ij = PETSC_FALSE; 3094 c->parent = A; 3095 C->preallocated = PETSC_TRUE; 3096 C->assembled = PETSC_TRUE; 3097 3098 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3099 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3100 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3101 } else { 3102 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3103 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3104 3105 c->singlemalloc = PETSC_TRUE; 3106 c->free_a = PETSC_TRUE; 3107 c->free_ij = PETSC_TRUE; 3108 3109 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3110 if (mbs > 0) { 3111 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3112 if (cpvalues == MAT_COPY_VALUES) { 3113 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3114 } else { 3115 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3116 } 3117 } 3118 C->preallocated = PETSC_TRUE; 3119 C->assembled = PETSC_TRUE; 3120 } 3121 } 3122 3123 c->roworiented = a->roworiented; 3124 c->nonew = a->nonew; 3125 3126 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3127 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3128 3129 c->bs2 = a->bs2; 3130 c->mbs = a->mbs; 3131 c->nbs = a->nbs; 3132 3133 if (a->diag) { 3134 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3135 c->diag = a->diag; 3136 c->free_diag = PETSC_FALSE; 3137 } else { 3138 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3139 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3140 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3141 c->free_diag = PETSC_TRUE; 3142 } 3143 } else c->diag = 0; 3144 3145 c->nz = a->nz; 3146 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3147 c->solve_work = NULL; 3148 c->mult_work = NULL; 3149 c->sor_workt = NULL; 3150 c->sor_work = NULL; 3151 3152 c->compressedrow.use = a->compressedrow.use; 3153 c->compressedrow.nrows = a->compressedrow.nrows; 3154 if (a->compressedrow.use) { 3155 i = a->compressedrow.nrows; 3156 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3157 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3158 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3159 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3160 } else { 3161 c->compressedrow.use = PETSC_FALSE; 3162 c->compressedrow.i = NULL; 3163 c->compressedrow.rindex = NULL; 3164 } 3165 C->nonzerostate = A->nonzerostate; 3166 3167 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3168 PetscFunctionReturn(0); 3169 } 3170 3171 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3172 { 3173 PetscErrorCode ierr; 3174 3175 PetscFunctionBegin; 3176 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3177 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3178 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3179 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3180 PetscFunctionReturn(0); 3181 } 3182 3183 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3184 { 3185 Mat_SeqBAIJ *a; 3186 PetscErrorCode ierr; 3187 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3188 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3189 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3190 PetscInt *masked,nmask,tmp,bs2,ishift; 3191 PetscMPIInt size; 3192 int fd; 3193 PetscScalar *aa; 3194 MPI_Comm comm; 3195 PetscBool isbinary; 3196 3197 PetscFunctionBegin; 3198 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3199 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name); 3200 3201 /* force binary viewer to load .info file if it has not yet done so */ 3202 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3203 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3204 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3205 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3206 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3207 if (bs < 0) bs = 1; 3208 bs2 = bs*bs; 3209 3210 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3211 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3212 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3213 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3214 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3215 M = header[1]; N = header[2]; nz = header[3]; 3216 3217 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3218 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3219 3220 /* 3221 This code adds extra rows to make sure the number of rows is 3222 divisible by the blocksize 3223 */ 3224 mbs = M/bs; 3225 extra_rows = bs - M + bs*(mbs); 3226 if (extra_rows == bs) extra_rows = 0; 3227 else mbs++; 3228 if (extra_rows) { 3229 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3230 } 3231 3232 /* Set global sizes if not already set */ 3233 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3234 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3235 } else { /* Check if the matrix global sizes are correct */ 3236 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3237 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3238 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3239 } 3240 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3241 } 3242 3243 /* read in row lengths */ 3244 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3245 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3246 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3247 3248 /* read in column indices */ 3249 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3250 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3251 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3252 3253 /* loop over row lengths determining block row lengths */ 3254 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3255 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3256 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3257 rowcount = 0; 3258 nzcount = 0; 3259 for (i=0; i<mbs; i++) { 3260 nmask = 0; 3261 for (j=0; j<bs; j++) { 3262 kmax = rowlengths[rowcount]; 3263 for (k=0; k<kmax; k++) { 3264 tmp = jj[nzcount++]/bs; 3265 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3266 } 3267 rowcount++; 3268 } 3269 browlengths[i] += nmask; 3270 /* zero out the mask elements we set */ 3271 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3272 } 3273 3274 /* Do preallocation */ 3275 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3276 a = (Mat_SeqBAIJ*)newmat->data; 3277 3278 /* set matrix "i" values */ 3279 a->i[0] = 0; 3280 for (i=1; i<= mbs; i++) { 3281 a->i[i] = a->i[i-1] + browlengths[i-1]; 3282 a->ilen[i-1] = browlengths[i-1]; 3283 } 3284 a->nz = 0; 3285 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3286 3287 /* read in nonzero values */ 3288 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3289 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3290 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3291 3292 /* set "a" and "j" values into matrix */ 3293 nzcount = 0; jcount = 0; 3294 for (i=0; i<mbs; i++) { 3295 nzcountb = nzcount; 3296 nmask = 0; 3297 for (j=0; j<bs; j++) { 3298 kmax = rowlengths[i*bs+j]; 3299 for (k=0; k<kmax; k++) { 3300 tmp = jj[nzcount++]/bs; 3301 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3302 } 3303 } 3304 /* sort the masked values */ 3305 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3306 3307 /* set "j" values into matrix */ 3308 maskcount = 1; 3309 for (j=0; j<nmask; j++) { 3310 a->j[jcount++] = masked[j]; 3311 mask[masked[j]] = maskcount++; 3312 } 3313 /* set "a" values into matrix */ 3314 ishift = bs2*a->i[i]; 3315 for (j=0; j<bs; j++) { 3316 kmax = rowlengths[i*bs+j]; 3317 for (k=0; k<kmax; k++) { 3318 tmp = jj[nzcountb]/bs; 3319 block = mask[tmp] - 1; 3320 point = jj[nzcountb] - bs*tmp; 3321 idx = ishift + bs2*block + j + bs*point; 3322 a->a[idx] = (MatScalar)aa[nzcountb++]; 3323 } 3324 } 3325 /* zero out the mask elements we set */ 3326 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3327 } 3328 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3329 3330 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3331 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3332 ierr = PetscFree(aa);CHKERRQ(ierr); 3333 ierr = PetscFree(jj);CHKERRQ(ierr); 3334 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3335 3336 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3337 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3338 PetscFunctionReturn(0); 3339 } 3340 3341 /*@C 3342 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3343 compressed row) format. For good matrix assembly performance the 3344 user should preallocate the matrix storage by setting the parameter nz 3345 (or the array nnz). By setting these parameters accurately, performance 3346 during matrix assembly can be increased by more than a factor of 50. 3347 3348 Collective on MPI_Comm 3349 3350 Input Parameters: 3351 + comm - MPI communicator, set to PETSC_COMM_SELF 3352 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3353 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3354 . m - number of rows 3355 . n - number of columns 3356 . nz - number of nonzero blocks per block row (same for all rows) 3357 - nnz - array containing the number of nonzero blocks in the various block rows 3358 (possibly different for each block row) or NULL 3359 3360 Output Parameter: 3361 . A - the matrix 3362 3363 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3364 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3365 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3366 3367 Options Database Keys: 3368 . -mat_no_unroll - uses code that does not unroll the loops in the 3369 block calculations (much slower) 3370 . -mat_block_size - size of the blocks to use 3371 3372 Level: intermediate 3373 3374 Notes: 3375 The number of rows and columns must be divisible by blocksize. 3376 3377 If the nnz parameter is given then the nz parameter is ignored 3378 3379 A nonzero block is any block that as 1 or more nonzeros in it 3380 3381 The block AIJ format is fully compatible with standard Fortran 77 3382 storage. That is, the stored row and column indices can begin at 3383 either one (as in Fortran) or zero. See the users' manual for details. 3384 3385 Specify the preallocated storage with either nz or nnz (not both). 3386 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3387 allocation. See Users-Manual: ch_mat for details. 3388 matrices. 3389 3390 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3391 @*/ 3392 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3393 { 3394 PetscErrorCode ierr; 3395 3396 PetscFunctionBegin; 3397 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3398 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3399 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3400 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3401 PetscFunctionReturn(0); 3402 } 3403 3404 /*@C 3405 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3406 per row in the matrix. For good matrix assembly performance the 3407 user should preallocate the matrix storage by setting the parameter nz 3408 (or the array nnz). By setting these parameters accurately, performance 3409 during matrix assembly can be increased by more than a factor of 50. 3410 3411 Collective on MPI_Comm 3412 3413 Input Parameters: 3414 + B - the matrix 3415 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3416 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3417 . nz - number of block nonzeros per block row (same for all rows) 3418 - nnz - array containing the number of block nonzeros in the various block rows 3419 (possibly different for each block row) or NULL 3420 3421 Options Database Keys: 3422 . -mat_no_unroll - uses code that does not unroll the loops in the 3423 block calculations (much slower) 3424 . -mat_block_size - size of the blocks to use 3425 3426 Level: intermediate 3427 3428 Notes: 3429 If the nnz parameter is given then the nz parameter is ignored 3430 3431 You can call MatGetInfo() to get information on how effective the preallocation was; 3432 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3433 You can also run with the option -info and look for messages with the string 3434 malloc in them to see if additional memory allocation was needed. 3435 3436 The block AIJ format is fully compatible with standard Fortran 77 3437 storage. That is, the stored row and column indices can begin at 3438 either one (as in Fortran) or zero. See the users' manual for details. 3439 3440 Specify the preallocated storage with either nz or nnz (not both). 3441 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3442 allocation. See Users-Manual: ch_mat for details. 3443 3444 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3445 @*/ 3446 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3447 { 3448 PetscErrorCode ierr; 3449 3450 PetscFunctionBegin; 3451 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3452 PetscValidType(B,1); 3453 PetscValidLogicalCollectiveInt(B,bs,2); 3454 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3455 PetscFunctionReturn(0); 3456 } 3457 3458 /*@C 3459 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3460 (the default sequential PETSc format). 3461 3462 Collective on MPI_Comm 3463 3464 Input Parameters: 3465 + B - the matrix 3466 . i - the indices into j for the start of each local row (starts with zero) 3467 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3468 - v - optional values in the matrix 3469 3470 Level: developer 3471 3472 Notes: 3473 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3474 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3475 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3476 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3477 block column and the second index is over columns within a block. 3478 3479 .keywords: matrix, aij, compressed row, sparse 3480 3481 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3482 @*/ 3483 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3484 { 3485 PetscErrorCode ierr; 3486 3487 PetscFunctionBegin; 3488 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3489 PetscValidType(B,1); 3490 PetscValidLogicalCollectiveInt(B,bs,2); 3491 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3492 PetscFunctionReturn(0); 3493 } 3494 3495 3496 /*@ 3497 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3498 3499 Collective on MPI_Comm 3500 3501 Input Parameters: 3502 + comm - must be an MPI communicator of size 1 3503 . bs - size of block 3504 . m - number of rows 3505 . n - number of columns 3506 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3507 . j - column indices 3508 - a - matrix values 3509 3510 Output Parameter: 3511 . mat - the matrix 3512 3513 Level: advanced 3514 3515 Notes: 3516 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3517 once the matrix is destroyed 3518 3519 You cannot set new nonzero locations into this matrix, that will generate an error. 3520 3521 The i and j indices are 0 based 3522 3523 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3524 3525 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3526 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3527 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3528 with column-major ordering within blocks. 3529 3530 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3531 3532 @*/ 3533 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3534 { 3535 PetscErrorCode ierr; 3536 PetscInt ii; 3537 Mat_SeqBAIJ *baij; 3538 3539 PetscFunctionBegin; 3540 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3541 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3542 3543 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3544 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3545 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3546 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3547 baij = (Mat_SeqBAIJ*)(*mat)->data; 3548 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3549 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3550 3551 baij->i = i; 3552 baij->j = j; 3553 baij->a = a; 3554 3555 baij->singlemalloc = PETSC_FALSE; 3556 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3557 baij->free_a = PETSC_FALSE; 3558 baij->free_ij = PETSC_FALSE; 3559 3560 for (ii=0; ii<m; ii++) { 3561 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3562 #if defined(PETSC_USE_DEBUG) 3563 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3564 #endif 3565 } 3566 #if defined(PETSC_USE_DEBUG) 3567 for (ii=0; ii<baij->i[m]; ii++) { 3568 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3569 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3570 } 3571 #endif 3572 3573 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3574 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3575 PetscFunctionReturn(0); 3576 } 3577 3578 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3579 { 3580 PetscErrorCode ierr; 3581 PetscMPIInt size; 3582 3583 PetscFunctionBegin; 3584 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3585 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3586 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3587 } else { 3588 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3589 } 3590 PetscFunctionReturn(0); 3591 } 3592