1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 916 stepval = (n-1)*4; 917 for (k=0; k<m; k++) { /* loop over added rows */ 918 row = im[k]; 919 rp = aj + ai[row]; 920 ap = aa + 16*ai[row]; 921 nrow = ailen[row]; 922 low = 0; 923 high = nrow; 924 for (l=0; l<n; l++) { /* loop over added columns */ 925 col = in[l]; 926 if (col <= lastcol) low = 0; 927 else high = nrow; 928 lastcol = col; 929 value = v + k*(stepval+4 + l)*4; 930 while (high-low > 7) { 931 t = (low+high)/2; 932 if (rp[t] > col) high = t; 933 else low = t; 934 } 935 for (i=low; i<high; i++) { 936 if (rp[i] > col) break; 937 if (rp[i] == col) { 938 bap = ap + 16*i; 939 for (ii=0; ii<4; ii++,value+=stepval) { 940 for (jj=ii; jj<16; jj+=4) { 941 bap[jj] += *value++; 942 } 943 } 944 goto noinsert2; 945 } 946 } 947 N = nrow++ - 1; 948 high++; /* added new column index thus must search to one higher than before */ 949 /* shift up all the later entries in this row */ 950 for (ii=N; ii>=i; ii--) { 951 rp[ii+1] = rp[ii]; 952 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 953 } 954 if (N >= i) { 955 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 956 } 957 rp[i] = col; 958 bap = ap + 16*i; 959 for (ii=0; ii<4; ii++,value+=stepval) { 960 for (jj=ii; jj<16; jj+=4) { 961 bap[jj] = *value++; 962 } 963 } 964 noinsert2:; 965 low = i; 966 } 967 ailen[row] = nrow; 968 } 969 PetscFunctionReturnVoid(); 970 } 971 972 #if defined(PETSC_HAVE_FORTRAN_CAPS) 973 #define matsetvalues4_ MATSETVALUES4 974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 975 #define matsetvalues4_ matsetvalues4 976 #endif 977 978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 979 { 980 Mat A = *AA; 981 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 982 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 983 PetscInt *ai=a->i,*ailen=a->ilen; 984 PetscInt *aj=a->j,brow,bcol; 985 PetscInt ridx,cidx,lastcol = -1; 986 MatScalar *ap,value,*aa=a->a,*bap; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 for (k=0; k<m; k++) { /* loop over added rows */ 991 row = im[k]; brow = row/4; 992 rp = aj + ai[brow]; 993 ap = aa + 16*ai[brow]; 994 nrow = ailen[brow]; 995 low = 0; 996 high = nrow; 997 for (l=0; l<n; l++) { /* loop over added columns */ 998 col = in[l]; bcol = col/4; 999 ridx = row % 4; cidx = col % 4; 1000 value = v[l + k*n]; 1001 if (col <= lastcol) low = 0; 1002 else high = nrow; 1003 lastcol = col; 1004 while (high-low > 7) { 1005 t = (low+high)/2; 1006 if (rp[t] > bcol) high = t; 1007 else low = t; 1008 } 1009 for (i=low; i<high; i++) { 1010 if (rp[i] > bcol) break; 1011 if (rp[i] == bcol) { 1012 bap = ap + 16*i + 4*cidx + ridx; 1013 *bap += value; 1014 goto noinsert1; 1015 } 1016 } 1017 N = nrow++ - 1; 1018 high++; /* added new column thus must search to one higher than before */ 1019 /* shift up all the later entries in this row */ 1020 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1021 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1022 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1023 rp[i] = bcol; 1024 ap[16*i + 4*cidx + ridx] = value; 1025 noinsert1:; 1026 low = i; 1027 } 1028 ailen[brow] = nrow; 1029 } 1030 PetscFunctionReturnVoid(); 1031 } 1032 1033 /* 1034 Checks for missing diagonals 1035 */ 1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1037 { 1038 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1039 PetscErrorCode ierr; 1040 PetscInt *diag,*ii = a->i,i; 1041 1042 PetscFunctionBegin; 1043 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1044 *missing = PETSC_FALSE; 1045 if (A->rmap->n > 0 && !ii) { 1046 *missing = PETSC_TRUE; 1047 if (d) *d = 0; 1048 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1049 } else { 1050 PetscInt n; 1051 n = PetscMin(a->mbs, a->nbs); 1052 diag = a->diag; 1053 for (i=0; i<n; i++) { 1054 if (diag[i] >= ii[i+1]) { 1055 *missing = PETSC_TRUE; 1056 if (d) *d = i; 1057 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1058 break; 1059 } 1060 } 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1066 { 1067 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 PetscInt i,j,m = a->mbs; 1070 1071 PetscFunctionBegin; 1072 if (!a->diag) { 1073 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1074 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1075 a->free_diag = PETSC_TRUE; 1076 } 1077 for (i=0; i<m; i++) { 1078 a->diag[i] = a->i[i+1]; 1079 for (j=a->i[i]; j<a->i[i+1]; j++) { 1080 if (a->j[j] == i) { 1081 a->diag[i] = j; 1082 break; 1083 } 1084 } 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = oshift; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1174 { 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (!ia) PetscFunctionReturn(0); 1179 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1180 ierr = PetscFree(*ia);CHKERRQ(ierr); 1181 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1187 { 1188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 #if defined(PETSC_USE_LOG) 1193 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1194 #endif 1195 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1197 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1198 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1199 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1200 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1201 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1204 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1205 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1206 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1207 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1208 1209 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1210 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1211 ierr = PetscFree(A->data);CHKERRQ(ierr); 1212 1213 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1226 #if defined(PETSC_HAVE_HYPRE) 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1235 { 1236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 switch (op) { 1241 case MAT_ROW_ORIENTED: 1242 a->roworiented = flg; 1243 break; 1244 case MAT_KEEP_NONZERO_PATTERN: 1245 a->keepnonzeropattern = flg; 1246 break; 1247 case MAT_NEW_NONZERO_LOCATIONS: 1248 a->nonew = (flg ? 0 : 1); 1249 break; 1250 case MAT_NEW_NONZERO_LOCATION_ERR: 1251 a->nonew = (flg ? -1 : 0); 1252 break; 1253 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1254 a->nonew = (flg ? -2 : 0); 1255 break; 1256 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1257 a->nounused = (flg ? -1 : 0); 1258 break; 1259 case MAT_NEW_DIAGONALS: 1260 case MAT_IGNORE_OFF_PROC_ENTRIES: 1261 case MAT_USE_HASH_TABLE: 1262 case MAT_SORTED_FULL: 1263 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1264 break; 1265 case MAT_SPD: 1266 case MAT_SYMMETRIC: 1267 case MAT_STRUCTURALLY_SYMMETRIC: 1268 case MAT_HERMITIAN: 1269 case MAT_SYMMETRY_ETERNAL: 1270 case MAT_SUBMAT_SINGLEIS: 1271 case MAT_STRUCTURE_ONLY: 1272 /* These options are handled directly by MatSetOption() */ 1273 break; 1274 default: 1275 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1281 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1282 { 1283 PetscErrorCode ierr; 1284 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1285 MatScalar *aa_i; 1286 PetscScalar *v_i; 1287 1288 PetscFunctionBegin; 1289 bs = A->rmap->bs; 1290 bs2 = bs*bs; 1291 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1292 1293 bn = row/bs; /* Block number */ 1294 bp = row % bs; /* Block Position */ 1295 M = ai[bn+1] - ai[bn]; 1296 *nz = bs*M; 1297 1298 if (v) { 1299 *v = 0; 1300 if (*nz) { 1301 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1302 for (i=0; i<M; i++) { /* for each block in the block row */ 1303 v_i = *v + i*bs; 1304 aa_i = aa + bs2*(ai[bn] + i); 1305 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1306 } 1307 } 1308 } 1309 1310 if (idx) { 1311 *idx = 0; 1312 if (*nz) { 1313 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1314 for (i=0; i<M; i++) { /* for each block in the block row */ 1315 idx_i = *idx + i*bs; 1316 itmp = bs*aj[ai[bn] + i]; 1317 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1318 } 1319 } 1320 } 1321 PetscFunctionReturn(0); 1322 } 1323 1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1325 { 1326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 1329 PetscFunctionBegin; 1330 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1335 { 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1340 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1341 PetscFunctionReturn(0); 1342 } 1343 1344 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1345 { 1346 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1347 Mat C; 1348 PetscErrorCode ierr; 1349 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1350 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1351 MatScalar *ata,*aa=a->a; 1352 1353 PetscFunctionBegin; 1354 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1355 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1356 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1357 1358 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1359 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1360 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1361 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1362 1363 at = (Mat_SeqBAIJ*)C->data; 1364 ati = at->i; 1365 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1366 } else { 1367 C = *B; 1368 at = (Mat_SeqBAIJ*)C->data; 1369 ati = at->i; 1370 } 1371 1372 atj = at->j; 1373 ata = at->a; 1374 1375 /* Copy ati into atfill so we have locations of the next free space in atj */ 1376 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1377 1378 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1379 for (i=0; i<mbs; i++) { 1380 anzj = ai[i+1] - ai[i]; 1381 for (j=0; j<anzj; j++) { 1382 atj[atfill[*aj]] = i; 1383 for (kr=0; kr<bs; kr++) { 1384 for (k=0; k<bs; k++) { 1385 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1386 } 1387 } 1388 atfill[*aj++] += 1; 1389 } 1390 } 1391 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 1394 /* Clean up temporary space and complete requests. */ 1395 ierr = PetscFree(atfill);CHKERRQ(ierr); 1396 1397 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1398 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1399 *B = C; 1400 } else { 1401 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1407 { 1408 PetscErrorCode ierr; 1409 Mat Btrans; 1410 1411 PetscFunctionBegin; 1412 *f = PETSC_FALSE; 1413 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1414 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1415 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1420 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1421 { 1422 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1423 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1424 PetscInt *rowlens,*colidxs; 1425 PetscScalar *matvals; 1426 PetscErrorCode ierr; 1427 1428 PetscFunctionBegin; 1429 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1430 1431 M = mat->rmap->N; 1432 N = mat->cmap->N; 1433 m = mat->rmap->n; 1434 bs = mat->rmap->bs; 1435 nz = bs*bs*A->nz; 1436 1437 /* write matrix header */ 1438 header[0] = MAT_FILE_CLASSID; 1439 header[1] = M; header[2] = N; header[3] = nz; 1440 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1441 1442 /* store row lengths */ 1443 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1444 for (cnt=0, i=0; i<A->mbs; i++) 1445 for (j=0; j<bs; j++) 1446 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1447 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1448 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1449 1450 /* store column indices */ 1451 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1452 for (cnt=0, i=0; i<A->mbs; i++) 1453 for (k=0; k<bs; k++) 1454 for (j=A->i[i]; j<A->i[i+1]; j++) 1455 for (l=0; l<bs; l++) 1456 colidxs[cnt++] = bs*A->j[j] + l; 1457 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1458 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1459 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1460 1461 /* store nonzero values */ 1462 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1463 for (cnt=0, i=0; i<A->mbs; i++) 1464 for (k=0; k<bs; k++) 1465 for (j=A->i[i]; j<A->i[i+1]; j++) 1466 for (l=0; l<bs; l++) 1467 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1468 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1469 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1470 ierr = PetscFree(matvals);CHKERRQ(ierr); 1471 1472 /* write block size option to the viewer's .info file */ 1473 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1478 { 1479 PetscErrorCode ierr; 1480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1481 PetscInt i,bs = A->rmap->bs,k; 1482 1483 PetscFunctionBegin; 1484 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1485 for (i=0; i<a->mbs; i++) { 1486 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1487 for (k=a->i[i]; k<a->i[i+1]; k++) { 1488 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1489 } 1490 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1491 } 1492 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1493 PetscFunctionReturn(0); 1494 } 1495 1496 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1497 { 1498 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1499 PetscErrorCode ierr; 1500 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1501 PetscViewerFormat format; 1502 1503 PetscFunctionBegin; 1504 if (A->structure_only) { 1505 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1506 PetscFunctionReturn(0); 1507 } 1508 1509 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1510 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1511 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1512 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1513 const char *matname; 1514 Mat aij; 1515 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1516 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1517 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1518 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1519 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1520 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1521 PetscFunctionReturn(0); 1522 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1523 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1524 for (i=0; i<a->mbs; i++) { 1525 for (j=0; j<bs; j++) { 1526 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1527 for (k=a->i[i]; k<a->i[i+1]; k++) { 1528 for (l=0; l<bs; l++) { 1529 #if defined(PETSC_USE_COMPLEX) 1530 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1531 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1532 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1533 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1534 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1535 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1536 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1537 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1538 } 1539 #else 1540 if (a->a[bs2*k + l*bs + j] != 0.0) { 1541 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1542 } 1543 #endif 1544 } 1545 } 1546 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1547 } 1548 } 1549 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1550 } else { 1551 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1552 for (i=0; i<a->mbs; i++) { 1553 for (j=0; j<bs; j++) { 1554 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1555 for (k=a->i[i]; k<a->i[i+1]; k++) { 1556 for (l=0; l<bs; l++) { 1557 #if defined(PETSC_USE_COMPLEX) 1558 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1559 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1560 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1561 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1562 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1563 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1564 } else { 1565 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1566 } 1567 #else 1568 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1569 #endif 1570 } 1571 } 1572 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1573 } 1574 } 1575 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1576 } 1577 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1578 PetscFunctionReturn(0); 1579 } 1580 1581 #include <petscdraw.h> 1582 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1583 { 1584 Mat A = (Mat) Aa; 1585 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1586 PetscErrorCode ierr; 1587 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1588 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1589 MatScalar *aa; 1590 PetscViewer viewer; 1591 PetscViewerFormat format; 1592 1593 PetscFunctionBegin; 1594 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1595 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1596 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1597 1598 /* loop over matrix elements drawing boxes */ 1599 1600 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1601 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1602 /* Blue for negative, Cyan for zero and Red for positive */ 1603 color = PETSC_DRAW_BLUE; 1604 for (i=0,row=0; i<mbs; i++,row+=bs) { 1605 for (j=a->i[i]; j<a->i[i+1]; j++) { 1606 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1607 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1608 aa = a->a + j*bs2; 1609 for (k=0; k<bs; k++) { 1610 for (l=0; l<bs; l++) { 1611 if (PetscRealPart(*aa++) >= 0.) continue; 1612 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1613 } 1614 } 1615 } 1616 } 1617 color = PETSC_DRAW_CYAN; 1618 for (i=0,row=0; i<mbs; i++,row+=bs) { 1619 for (j=a->i[i]; j<a->i[i+1]; j++) { 1620 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1621 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1622 aa = a->a + j*bs2; 1623 for (k=0; k<bs; k++) { 1624 for (l=0; l<bs; l++) { 1625 if (PetscRealPart(*aa++) != 0.) continue; 1626 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1627 } 1628 } 1629 } 1630 } 1631 color = PETSC_DRAW_RED; 1632 for (i=0,row=0; i<mbs; i++,row+=bs) { 1633 for (j=a->i[i]; j<a->i[i+1]; j++) { 1634 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1635 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1636 aa = a->a + j*bs2; 1637 for (k=0; k<bs; k++) { 1638 for (l=0; l<bs; l++) { 1639 if (PetscRealPart(*aa++) <= 0.) continue; 1640 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1641 } 1642 } 1643 } 1644 } 1645 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1646 } else { 1647 /* use contour shading to indicate magnitude of values */ 1648 /* first determine max of all nonzero values */ 1649 PetscReal minv = 0.0, maxv = 0.0; 1650 PetscDraw popup; 1651 1652 for (i=0; i<a->nz*a->bs2; i++) { 1653 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1654 } 1655 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1656 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1657 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1658 1659 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1660 for (i=0,row=0; i<mbs; i++,row+=bs) { 1661 for (j=a->i[i]; j<a->i[i+1]; j++) { 1662 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1663 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1664 aa = a->a + j*bs2; 1665 for (k=0; k<bs; k++) { 1666 for (l=0; l<bs; l++) { 1667 MatScalar v = *aa++; 1668 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1669 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1670 } 1671 } 1672 } 1673 } 1674 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1675 } 1676 PetscFunctionReturn(0); 1677 } 1678 1679 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1680 { 1681 PetscErrorCode ierr; 1682 PetscReal xl,yl,xr,yr,w,h; 1683 PetscDraw draw; 1684 PetscBool isnull; 1685 1686 PetscFunctionBegin; 1687 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1688 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1689 if (isnull) PetscFunctionReturn(0); 1690 1691 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1692 xr += w; yr += h; xl = -w; yl = -h; 1693 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1694 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1695 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1696 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1697 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1698 PetscFunctionReturn(0); 1699 } 1700 1701 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1702 { 1703 PetscErrorCode ierr; 1704 PetscBool iascii,isbinary,isdraw; 1705 1706 PetscFunctionBegin; 1707 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1708 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1709 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1710 if (iascii) { 1711 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1712 } else if (isbinary) { 1713 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1714 } else if (isdraw) { 1715 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1716 } else { 1717 Mat B; 1718 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1719 ierr = MatView(B,viewer);CHKERRQ(ierr); 1720 ierr = MatDestroy(&B);CHKERRQ(ierr); 1721 } 1722 PetscFunctionReturn(0); 1723 } 1724 1725 1726 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1727 { 1728 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1729 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1730 PetscInt *ai = a->i,*ailen = a->ilen; 1731 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1732 MatScalar *ap,*aa = a->a; 1733 1734 PetscFunctionBegin; 1735 for (k=0; k<m; k++) { /* loop over rows */ 1736 row = im[k]; brow = row/bs; 1737 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1738 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1739 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1740 nrow = ailen[brow]; 1741 for (l=0; l<n; l++) { /* loop over columns */ 1742 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1743 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1744 col = in[l]; 1745 bcol = col/bs; 1746 cidx = col%bs; 1747 ridx = row%bs; 1748 high = nrow; 1749 low = 0; /* assume unsorted */ 1750 while (high-low > 5) { 1751 t = (low+high)/2; 1752 if (rp[t] > bcol) high = t; 1753 else low = t; 1754 } 1755 for (i=low; i<high; i++) { 1756 if (rp[i] > bcol) break; 1757 if (rp[i] == bcol) { 1758 *v++ = ap[bs2*i+bs*cidx+ridx]; 1759 goto finished; 1760 } 1761 } 1762 *v++ = 0.0; 1763 finished:; 1764 } 1765 } 1766 PetscFunctionReturn(0); 1767 } 1768 1769 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1770 { 1771 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1772 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1773 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1774 PetscErrorCode ierr; 1775 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1776 PetscBool roworiented=a->roworiented; 1777 const PetscScalar *value = v; 1778 MatScalar *ap=NULL,*aa = a->a,*bap; 1779 1780 PetscFunctionBegin; 1781 if (roworiented) { 1782 stepval = (n-1)*bs; 1783 } else { 1784 stepval = (m-1)*bs; 1785 } 1786 for (k=0; k<m; k++) { /* loop over added rows */ 1787 row = im[k]; 1788 if (row < 0) continue; 1789 #if defined(PETSC_USE_DEBUG) 1790 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1791 #endif 1792 rp = aj + ai[row]; 1793 if (!A->structure_only) ap = aa + bs2*ai[row]; 1794 rmax = imax[row]; 1795 nrow = ailen[row]; 1796 low = 0; 1797 high = nrow; 1798 for (l=0; l<n; l++) { /* loop over added columns */ 1799 if (in[l] < 0) continue; 1800 #if defined(PETSC_USE_DEBUG) 1801 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1802 #endif 1803 col = in[l]; 1804 if (!A->structure_only) { 1805 if (roworiented) { 1806 value = v + (k*(stepval+bs) + l)*bs; 1807 } else { 1808 value = v + (l*(stepval+bs) + k)*bs; 1809 } 1810 } 1811 if (col <= lastcol) low = 0; 1812 else high = nrow; 1813 lastcol = col; 1814 while (high-low > 7) { 1815 t = (low+high)/2; 1816 if (rp[t] > col) high = t; 1817 else low = t; 1818 } 1819 for (i=low; i<high; i++) { 1820 if (rp[i] > col) break; 1821 if (rp[i] == col) { 1822 if (A->structure_only) goto noinsert2; 1823 bap = ap + bs2*i; 1824 if (roworiented) { 1825 if (is == ADD_VALUES) { 1826 for (ii=0; ii<bs; ii++,value+=stepval) { 1827 for (jj=ii; jj<bs2; jj+=bs) { 1828 bap[jj] += *value++; 1829 } 1830 } 1831 } else { 1832 for (ii=0; ii<bs; ii++,value+=stepval) { 1833 for (jj=ii; jj<bs2; jj+=bs) { 1834 bap[jj] = *value++; 1835 } 1836 } 1837 } 1838 } else { 1839 if (is == ADD_VALUES) { 1840 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1841 for (jj=0; jj<bs; jj++) { 1842 bap[jj] += value[jj]; 1843 } 1844 bap += bs; 1845 } 1846 } else { 1847 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1848 for (jj=0; jj<bs; jj++) { 1849 bap[jj] = value[jj]; 1850 } 1851 bap += bs; 1852 } 1853 } 1854 } 1855 goto noinsert2; 1856 } 1857 } 1858 if (nonew == 1) goto noinsert2; 1859 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1860 if (A->structure_only) { 1861 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1862 } else { 1863 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1864 } 1865 N = nrow++ - 1; high++; 1866 /* shift up all the later entries in this row */ 1867 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1868 rp[i] = col; 1869 if (!A->structure_only) { 1870 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1871 bap = ap + bs2*i; 1872 if (roworiented) { 1873 for (ii=0; ii<bs; ii++,value+=stepval) { 1874 for (jj=ii; jj<bs2; jj+=bs) { 1875 bap[jj] = *value++; 1876 } 1877 } 1878 } else { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=0; jj<bs; jj++) { 1881 *bap++ = *value++; 1882 } 1883 } 1884 } 1885 } 1886 noinsert2:; 1887 low = i; 1888 } 1889 ailen[row] = nrow; 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1895 { 1896 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1897 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1898 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1899 PetscErrorCode ierr; 1900 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1901 MatScalar *aa = a->a,*ap; 1902 PetscReal ratio=0.6; 1903 1904 PetscFunctionBegin; 1905 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1906 1907 if (m) rmax = ailen[0]; 1908 for (i=1; i<mbs; i++) { 1909 /* move each row back by the amount of empty slots (fshift) before it*/ 1910 fshift += imax[i-1] - ailen[i-1]; 1911 rmax = PetscMax(rmax,ailen[i]); 1912 if (fshift) { 1913 ip = aj + ai[i]; 1914 ap = aa + bs2*ai[i]; 1915 N = ailen[i]; 1916 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1917 if (!A->structure_only) { 1918 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1919 } 1920 } 1921 ai[i] = ai[i-1] + ailen[i-1]; 1922 } 1923 if (mbs) { 1924 fshift += imax[mbs-1] - ailen[mbs-1]; 1925 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1926 } 1927 1928 /* reset ilen and imax for each row */ 1929 a->nonzerorowcnt = 0; 1930 if (A->structure_only) { 1931 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1932 } else { /* !A->structure_only */ 1933 for (i=0; i<mbs; i++) { 1934 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1935 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1936 } 1937 } 1938 a->nz = ai[mbs]; 1939 1940 /* diagonals may have moved, so kill the diagonal pointers */ 1941 a->idiagvalid = PETSC_FALSE; 1942 if (fshift && a->diag) { 1943 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1944 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1945 a->diag = 0; 1946 } 1947 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1948 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1949 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1950 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1951 1952 A->info.mallocs += a->reallocs; 1953 a->reallocs = 0; 1954 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1955 a->rmax = rmax; 1956 1957 if (!A->structure_only) { 1958 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1959 } 1960 PetscFunctionReturn(0); 1961 } 1962 1963 /* 1964 This function returns an array of flags which indicate the locations of contiguous 1965 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1966 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1967 Assume: sizes should be long enough to hold all the values. 1968 */ 1969 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1970 { 1971 PetscInt i,j,k,row; 1972 PetscBool flg; 1973 1974 PetscFunctionBegin; 1975 for (i=0,j=0; i<n; j++) { 1976 row = idx[i]; 1977 if (row%bs!=0) { /* Not the begining of a block */ 1978 sizes[j] = 1; 1979 i++; 1980 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1981 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1982 i++; 1983 } else { /* Begining of the block, so check if the complete block exists */ 1984 flg = PETSC_TRUE; 1985 for (k=1; k<bs; k++) { 1986 if (row+k != idx[i+k]) { /* break in the block */ 1987 flg = PETSC_FALSE; 1988 break; 1989 } 1990 } 1991 if (flg) { /* No break in the bs */ 1992 sizes[j] = bs; 1993 i += bs; 1994 } else { 1995 sizes[j] = 1; 1996 i++; 1997 } 1998 } 1999 } 2000 *bs_max = j; 2001 PetscFunctionReturn(0); 2002 } 2003 2004 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2005 { 2006 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2007 PetscErrorCode ierr; 2008 PetscInt i,j,k,count,*rows; 2009 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2010 PetscScalar zero = 0.0; 2011 MatScalar *aa; 2012 const PetscScalar *xx; 2013 PetscScalar *bb; 2014 2015 PetscFunctionBegin; 2016 /* fix right hand side if needed */ 2017 if (x && b) { 2018 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2019 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2020 for (i=0; i<is_n; i++) { 2021 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2022 } 2023 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2024 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2025 } 2026 2027 /* Make a copy of the IS and sort it */ 2028 /* allocate memory for rows,sizes */ 2029 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2030 2031 /* copy IS values to rows, and sort them */ 2032 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2033 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2034 2035 if (baij->keepnonzeropattern) { 2036 for (i=0; i<is_n; i++) sizes[i] = 1; 2037 bs_max = is_n; 2038 } else { 2039 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2040 A->nonzerostate++; 2041 } 2042 2043 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2044 row = rows[j]; 2045 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2046 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2047 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2048 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2049 if (diag != (PetscScalar)0.0) { 2050 if (baij->ilen[row/bs] > 0) { 2051 baij->ilen[row/bs] = 1; 2052 baij->j[baij->i[row/bs]] = row/bs; 2053 2054 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2055 } 2056 /* Now insert all the diagonal values for this bs */ 2057 for (k=0; k<bs; k++) { 2058 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2059 } 2060 } else { /* (diag == 0.0) */ 2061 baij->ilen[row/bs] = 0; 2062 } /* end (diag == 0.0) */ 2063 } else { /* (sizes[i] != bs) */ 2064 #if defined(PETSC_USE_DEBUG) 2065 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2066 #endif 2067 for (k=0; k<count; k++) { 2068 aa[0] = zero; 2069 aa += bs; 2070 } 2071 if (diag != (PetscScalar)0.0) { 2072 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2073 } 2074 } 2075 } 2076 2077 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2083 { 2084 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2085 PetscErrorCode ierr; 2086 PetscInt i,j,k,count; 2087 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2088 PetscScalar zero = 0.0; 2089 MatScalar *aa; 2090 const PetscScalar *xx; 2091 PetscScalar *bb; 2092 PetscBool *zeroed,vecs = PETSC_FALSE; 2093 2094 PetscFunctionBegin; 2095 /* fix right hand side if needed */ 2096 if (x && b) { 2097 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2098 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2099 vecs = PETSC_TRUE; 2100 } 2101 2102 /* zero the columns */ 2103 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2104 for (i=0; i<is_n; i++) { 2105 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2106 zeroed[is_idx[i]] = PETSC_TRUE; 2107 } 2108 for (i=0; i<A->rmap->N; i++) { 2109 if (!zeroed[i]) { 2110 row = i/bs; 2111 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2112 for (k=0; k<bs; k++) { 2113 col = bs*baij->j[j] + k; 2114 if (zeroed[col]) { 2115 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2116 if (vecs) bb[i] -= aa[0]*xx[col]; 2117 aa[0] = 0.0; 2118 } 2119 } 2120 } 2121 } else if (vecs) bb[i] = diag*xx[i]; 2122 } 2123 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2124 if (vecs) { 2125 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2126 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2127 } 2128 2129 /* zero the rows */ 2130 for (i=0; i<is_n; i++) { 2131 row = is_idx[i]; 2132 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2133 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2134 for (k=0; k<count; k++) { 2135 aa[0] = zero; 2136 aa += bs; 2137 } 2138 if (diag != (PetscScalar)0.0) { 2139 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2140 } 2141 } 2142 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2147 { 2148 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2149 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2150 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2151 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2152 PetscErrorCode ierr; 2153 PetscInt ridx,cidx,bs2=a->bs2; 2154 PetscBool roworiented=a->roworiented; 2155 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2156 2157 PetscFunctionBegin; 2158 for (k=0; k<m; k++) { /* loop over added rows */ 2159 row = im[k]; 2160 brow = row/bs; 2161 if (row < 0) continue; 2162 #if defined(PETSC_USE_DEBUG) 2163 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2164 #endif 2165 rp = aj + ai[brow]; 2166 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2167 rmax = imax[brow]; 2168 nrow = ailen[brow]; 2169 low = 0; 2170 high = nrow; 2171 for (l=0; l<n; l++) { /* loop over added columns */ 2172 if (in[l] < 0) continue; 2173 #if defined(PETSC_USE_DEBUG) 2174 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2175 #endif 2176 col = in[l]; bcol = col/bs; 2177 ridx = row % bs; cidx = col % bs; 2178 if (!A->structure_only) { 2179 if (roworiented) { 2180 value = v[l + k*n]; 2181 } else { 2182 value = v[k + l*m]; 2183 } 2184 } 2185 if (col <= lastcol) low = 0; else high = nrow; 2186 lastcol = col; 2187 while (high-low > 7) { 2188 t = (low+high)/2; 2189 if (rp[t] > bcol) high = t; 2190 else low = t; 2191 } 2192 for (i=low; i<high; i++) { 2193 if (rp[i] > bcol) break; 2194 if (rp[i] == bcol) { 2195 bap = ap + bs2*i + bs*cidx + ridx; 2196 if (!A->structure_only) { 2197 if (is == ADD_VALUES) *bap += value; 2198 else *bap = value; 2199 } 2200 goto noinsert1; 2201 } 2202 } 2203 if (nonew == 1) goto noinsert1; 2204 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2205 if (A->structure_only) { 2206 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2207 } else { 2208 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2209 } 2210 N = nrow++ - 1; high++; 2211 /* shift up all the later entries in this row */ 2212 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2213 rp[i] = bcol; 2214 if (!A->structure_only) { 2215 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2216 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2217 ap[bs2*i + bs*cidx + ridx] = value; 2218 } 2219 a->nz++; 2220 A->nonzerostate++; 2221 noinsert1:; 2222 low = i; 2223 } 2224 ailen[brow] = nrow; 2225 } 2226 PetscFunctionReturn(0); 2227 } 2228 2229 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2230 { 2231 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2232 Mat outA; 2233 PetscErrorCode ierr; 2234 PetscBool row_identity,col_identity; 2235 2236 PetscFunctionBegin; 2237 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2238 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2239 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2240 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2241 2242 outA = inA; 2243 inA->factortype = MAT_FACTOR_LU; 2244 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2245 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2246 2247 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2248 2249 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2250 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2251 a->row = row; 2252 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2253 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2254 a->col = col; 2255 2256 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2257 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2258 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2259 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2260 2261 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2262 if (!a->solve_work) { 2263 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2264 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2265 } 2266 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2271 { 2272 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2273 PetscInt i,nz,mbs; 2274 2275 PetscFunctionBegin; 2276 nz = baij->maxnz; 2277 mbs = baij->mbs; 2278 for (i=0; i<nz; i++) { 2279 baij->j[i] = indices[i]; 2280 } 2281 baij->nz = nz; 2282 for (i=0; i<mbs; i++) { 2283 baij->ilen[i] = baij->imax[i]; 2284 } 2285 PetscFunctionReturn(0); 2286 } 2287 2288 /*@ 2289 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2290 in the matrix. 2291 2292 Input Parameters: 2293 + mat - the SeqBAIJ matrix 2294 - indices - the column indices 2295 2296 Level: advanced 2297 2298 Notes: 2299 This can be called if you have precomputed the nonzero structure of the 2300 matrix and want to provide it to the matrix object to improve the performance 2301 of the MatSetValues() operation. 2302 2303 You MUST have set the correct numbers of nonzeros per row in the call to 2304 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2305 2306 MUST be called before any calls to MatSetValues(); 2307 2308 @*/ 2309 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2310 { 2311 PetscErrorCode ierr; 2312 2313 PetscFunctionBegin; 2314 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2315 PetscValidPointer(indices,2); 2316 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2317 PetscFunctionReturn(0); 2318 } 2319 2320 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2321 { 2322 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2323 PetscErrorCode ierr; 2324 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2325 PetscReal atmp; 2326 PetscScalar *x,zero = 0.0; 2327 MatScalar *aa; 2328 PetscInt ncols,brow,krow,kcol; 2329 2330 PetscFunctionBegin; 2331 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2332 bs = A->rmap->bs; 2333 aa = a->a; 2334 ai = a->i; 2335 aj = a->j; 2336 mbs = a->mbs; 2337 2338 ierr = VecSet(v,zero);CHKERRQ(ierr); 2339 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2340 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2341 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2342 for (i=0; i<mbs; i++) { 2343 ncols = ai[1] - ai[0]; ai++; 2344 brow = bs*i; 2345 for (j=0; j<ncols; j++) { 2346 for (kcol=0; kcol<bs; kcol++) { 2347 for (krow=0; krow<bs; krow++) { 2348 atmp = PetscAbsScalar(*aa);aa++; 2349 row = brow + krow; /* row index */ 2350 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2351 } 2352 } 2353 aj++; 2354 } 2355 } 2356 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2357 PetscFunctionReturn(0); 2358 } 2359 2360 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2361 { 2362 PetscErrorCode ierr; 2363 2364 PetscFunctionBegin; 2365 /* If the two matrices have the same copy implementation, use fast copy. */ 2366 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2367 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2368 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2369 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2370 2371 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2372 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2373 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2374 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2375 } else { 2376 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2377 } 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2382 { 2383 PetscErrorCode ierr; 2384 2385 PetscFunctionBegin; 2386 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2391 { 2392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2393 2394 PetscFunctionBegin; 2395 *array = a->a; 2396 PetscFunctionReturn(0); 2397 } 2398 2399 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2400 { 2401 PetscFunctionBegin; 2402 *array = NULL; 2403 PetscFunctionReturn(0); 2404 } 2405 2406 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2407 { 2408 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2409 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2410 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2411 PetscErrorCode ierr; 2412 2413 PetscFunctionBegin; 2414 /* Set the number of nonzeros in the new matrix */ 2415 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2420 { 2421 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2422 PetscErrorCode ierr; 2423 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2424 PetscBLASInt one=1; 2425 2426 PetscFunctionBegin; 2427 if (str == SAME_NONZERO_PATTERN) { 2428 PetscScalar alpha = a; 2429 PetscBLASInt bnz; 2430 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2431 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2432 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2433 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2434 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2435 } else { 2436 Mat B; 2437 PetscInt *nnz; 2438 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2439 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2440 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2441 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2442 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2443 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2444 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2445 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2446 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2447 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2448 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2449 ierr = PetscFree(nnz);CHKERRQ(ierr); 2450 } 2451 PetscFunctionReturn(0); 2452 } 2453 2454 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2455 { 2456 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2457 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2458 MatScalar *aa = a->a; 2459 2460 PetscFunctionBegin; 2461 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2466 { 2467 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2468 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2469 MatScalar *aa = a->a; 2470 2471 PetscFunctionBegin; 2472 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 /* 2477 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2478 */ 2479 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2480 { 2481 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2482 PetscErrorCode ierr; 2483 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2484 PetscInt nz = a->i[m],row,*jj,mr,col; 2485 2486 PetscFunctionBegin; 2487 *nn = n; 2488 if (!ia) PetscFunctionReturn(0); 2489 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2490 else { 2491 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2492 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2493 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2494 jj = a->j; 2495 for (i=0; i<nz; i++) { 2496 collengths[jj[i]]++; 2497 } 2498 cia[0] = oshift; 2499 for (i=0; i<n; i++) { 2500 cia[i+1] = cia[i] + collengths[i]; 2501 } 2502 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2503 jj = a->j; 2504 for (row=0; row<m; row++) { 2505 mr = a->i[row+1] - a->i[row]; 2506 for (i=0; i<mr; i++) { 2507 col = *jj++; 2508 2509 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2510 } 2511 } 2512 ierr = PetscFree(collengths);CHKERRQ(ierr); 2513 *ia = cia; *ja = cja; 2514 } 2515 PetscFunctionReturn(0); 2516 } 2517 2518 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2519 { 2520 PetscErrorCode ierr; 2521 2522 PetscFunctionBegin; 2523 if (!ia) PetscFunctionReturn(0); 2524 ierr = PetscFree(*ia);CHKERRQ(ierr); 2525 ierr = PetscFree(*ja);CHKERRQ(ierr); 2526 PetscFunctionReturn(0); 2527 } 2528 2529 /* 2530 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2531 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2532 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2533 */ 2534 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2535 { 2536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2537 PetscErrorCode ierr; 2538 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2539 PetscInt nz = a->i[m],row,*jj,mr,col; 2540 PetscInt *cspidx; 2541 2542 PetscFunctionBegin; 2543 *nn = n; 2544 if (!ia) PetscFunctionReturn(0); 2545 2546 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2547 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2549 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2550 jj = a->j; 2551 for (i=0; i<nz; i++) { 2552 collengths[jj[i]]++; 2553 } 2554 cia[0] = oshift; 2555 for (i=0; i<n; i++) { 2556 cia[i+1] = cia[i] + collengths[i]; 2557 } 2558 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2559 jj = a->j; 2560 for (row=0; row<m; row++) { 2561 mr = a->i[row+1] - a->i[row]; 2562 for (i=0; i<mr; i++) { 2563 col = *jj++; 2564 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2565 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2566 } 2567 } 2568 ierr = PetscFree(collengths);CHKERRQ(ierr); 2569 *ia = cia; 2570 *ja = cja; 2571 *spidx = cspidx; 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2576 { 2577 PetscErrorCode ierr; 2578 2579 PetscFunctionBegin; 2580 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2581 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2586 { 2587 PetscErrorCode ierr; 2588 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2589 2590 PetscFunctionBegin; 2591 if (!Y->preallocated || !aij->nz) { 2592 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2593 } 2594 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 /* -------------------------------------------------------------------*/ 2599 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2600 MatGetRow_SeqBAIJ, 2601 MatRestoreRow_SeqBAIJ, 2602 MatMult_SeqBAIJ_N, 2603 /* 4*/ MatMultAdd_SeqBAIJ_N, 2604 MatMultTranspose_SeqBAIJ, 2605 MatMultTransposeAdd_SeqBAIJ, 2606 0, 2607 0, 2608 0, 2609 /* 10*/ 0, 2610 MatLUFactor_SeqBAIJ, 2611 0, 2612 0, 2613 MatTranspose_SeqBAIJ, 2614 /* 15*/ MatGetInfo_SeqBAIJ, 2615 MatEqual_SeqBAIJ, 2616 MatGetDiagonal_SeqBAIJ, 2617 MatDiagonalScale_SeqBAIJ, 2618 MatNorm_SeqBAIJ, 2619 /* 20*/ 0, 2620 MatAssemblyEnd_SeqBAIJ, 2621 MatSetOption_SeqBAIJ, 2622 MatZeroEntries_SeqBAIJ, 2623 /* 24*/ MatZeroRows_SeqBAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /* 29*/ MatSetUp_SeqBAIJ, 2629 0, 2630 0, 2631 0, 2632 0, 2633 /* 34*/ MatDuplicate_SeqBAIJ, 2634 0, 2635 0, 2636 MatILUFactor_SeqBAIJ, 2637 0, 2638 /* 39*/ MatAXPY_SeqBAIJ, 2639 MatCreateSubMatrices_SeqBAIJ, 2640 MatIncreaseOverlap_SeqBAIJ, 2641 MatGetValues_SeqBAIJ, 2642 MatCopy_SeqBAIJ, 2643 /* 44*/ 0, 2644 MatScale_SeqBAIJ, 2645 MatShift_SeqBAIJ, 2646 0, 2647 MatZeroRowsColumns_SeqBAIJ, 2648 /* 49*/ 0, 2649 MatGetRowIJ_SeqBAIJ, 2650 MatRestoreRowIJ_SeqBAIJ, 2651 MatGetColumnIJ_SeqBAIJ, 2652 MatRestoreColumnIJ_SeqBAIJ, 2653 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2654 0, 2655 0, 2656 0, 2657 MatSetValuesBlocked_SeqBAIJ, 2658 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2659 MatDestroy_SeqBAIJ, 2660 MatView_SeqBAIJ, 2661 0, 2662 0, 2663 /* 64*/ 0, 2664 0, 2665 0, 2666 0, 2667 0, 2668 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2669 0, 2670 MatConvert_Basic, 2671 0, 2672 0, 2673 /* 74*/ 0, 2674 MatFDColoringApply_BAIJ, 2675 0, 2676 0, 2677 0, 2678 /* 79*/ 0, 2679 0, 2680 0, 2681 0, 2682 MatLoad_SeqBAIJ, 2683 /* 84*/ 0, 2684 0, 2685 0, 2686 0, 2687 0, 2688 /* 89*/ 0, 2689 0, 2690 0, 2691 0, 2692 0, 2693 /* 94*/ 0, 2694 0, 2695 0, 2696 0, 2697 0, 2698 /* 99*/ 0, 2699 0, 2700 0, 2701 0, 2702 0, 2703 /*104*/ 0, 2704 MatRealPart_SeqBAIJ, 2705 MatImaginaryPart_SeqBAIJ, 2706 0, 2707 0, 2708 /*109*/ 0, 2709 0, 2710 0, 2711 0, 2712 MatMissingDiagonal_SeqBAIJ, 2713 /*114*/ 0, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /*119*/ 0, 2719 0, 2720 MatMultHermitianTranspose_SeqBAIJ, 2721 MatMultHermitianTransposeAdd_SeqBAIJ, 2722 0, 2723 /*124*/ 0, 2724 0, 2725 MatInvertBlockDiagonal_SeqBAIJ, 2726 0, 2727 0, 2728 /*129*/ 0, 2729 0, 2730 0, 2731 0, 2732 0, 2733 /*134*/ 0, 2734 0, 2735 0, 2736 0, 2737 0, 2738 /*139*/ MatSetBlockSizes_Default, 2739 0, 2740 0, 2741 MatFDColoringSetUp_SeqXAIJ, 2742 0, 2743 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2744 MatDestroySubMatrices_SeqBAIJ 2745 }; 2746 2747 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2748 { 2749 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2750 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2751 PetscErrorCode ierr; 2752 2753 PetscFunctionBegin; 2754 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2755 2756 /* allocate space for values if not already there */ 2757 if (!aij->saved_values) { 2758 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2759 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2760 } 2761 2762 /* copy values over */ 2763 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2768 { 2769 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2770 PetscErrorCode ierr; 2771 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2772 2773 PetscFunctionBegin; 2774 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2775 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2776 2777 /* copy values over */ 2778 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2779 PetscFunctionReturn(0); 2780 } 2781 2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2784 2785 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2786 { 2787 Mat_SeqBAIJ *b; 2788 PetscErrorCode ierr; 2789 PetscInt i,mbs,nbs,bs2; 2790 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2791 2792 PetscFunctionBegin; 2793 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2794 if (nz == MAT_SKIP_ALLOCATION) { 2795 skipallocation = PETSC_TRUE; 2796 nz = 0; 2797 } 2798 2799 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2800 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2801 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2802 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2803 2804 B->preallocated = PETSC_TRUE; 2805 2806 mbs = B->rmap->n/bs; 2807 nbs = B->cmap->n/bs; 2808 bs2 = bs*bs; 2809 2810 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2811 2812 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2813 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2814 if (nnz) { 2815 for (i=0; i<mbs; i++) { 2816 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2817 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2818 } 2819 } 2820 2821 b = (Mat_SeqBAIJ*)B->data; 2822 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2823 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2824 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2825 2826 if (!flg) { 2827 switch (bs) { 2828 case 1: 2829 B->ops->mult = MatMult_SeqBAIJ_1; 2830 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2831 break; 2832 case 2: 2833 B->ops->mult = MatMult_SeqBAIJ_2; 2834 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2835 break; 2836 case 3: 2837 B->ops->mult = MatMult_SeqBAIJ_3; 2838 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2839 break; 2840 case 4: 2841 B->ops->mult = MatMult_SeqBAIJ_4; 2842 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2843 break; 2844 case 5: 2845 B->ops->mult = MatMult_SeqBAIJ_5; 2846 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2847 break; 2848 case 6: 2849 B->ops->mult = MatMult_SeqBAIJ_6; 2850 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2851 break; 2852 case 7: 2853 B->ops->mult = MatMult_SeqBAIJ_7; 2854 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2855 break; 2856 case 9: 2857 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2858 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2859 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2860 #else 2861 B->ops->mult = MatMult_SeqBAIJ_N; 2862 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2863 #endif 2864 break; 2865 case 11: 2866 B->ops->mult = MatMult_SeqBAIJ_11; 2867 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2868 break; 2869 case 15: 2870 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2872 break; 2873 default: 2874 B->ops->mult = MatMult_SeqBAIJ_N; 2875 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2876 break; 2877 } 2878 } 2879 B->ops->sor = MatSOR_SeqBAIJ; 2880 b->mbs = mbs; 2881 b->nbs = nbs; 2882 if (!skipallocation) { 2883 if (!b->imax) { 2884 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2885 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2886 2887 b->free_imax_ilen = PETSC_TRUE; 2888 } 2889 /* b->ilen will count nonzeros in each block row so far. */ 2890 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2891 if (!nnz) { 2892 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2893 else if (nz < 0) nz = 1; 2894 nz = PetscMin(nz,nbs); 2895 for (i=0; i<mbs; i++) b->imax[i] = nz; 2896 nz = nz*mbs; 2897 } else { 2898 PetscInt64 nz64 = 0; 2899 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 2900 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 2901 } 2902 2903 /* allocate the matrix space */ 2904 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2905 if (B->structure_only) { 2906 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2907 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2908 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2909 } else { 2910 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2911 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2912 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2913 } 2914 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 2915 2916 if (B->structure_only) { 2917 b->singlemalloc = PETSC_FALSE; 2918 b->free_a = PETSC_FALSE; 2919 } else { 2920 b->singlemalloc = PETSC_TRUE; 2921 b->free_a = PETSC_TRUE; 2922 } 2923 b->free_ij = PETSC_TRUE; 2924 2925 b->i[0] = 0; 2926 for (i=1; i<mbs+1; i++) { 2927 b->i[i] = b->i[i-1] + b->imax[i-1]; 2928 } 2929 2930 } else { 2931 b->free_a = PETSC_FALSE; 2932 b->free_ij = PETSC_FALSE; 2933 } 2934 2935 b->bs2 = bs2; 2936 b->mbs = mbs; 2937 b->nz = 0; 2938 b->maxnz = nz; 2939 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2940 B->was_assembled = PETSC_FALSE; 2941 B->assembled = PETSC_FALSE; 2942 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2943 PetscFunctionReturn(0); 2944 } 2945 2946 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2947 { 2948 PetscInt i,m,nz,nz_max=0,*nnz; 2949 PetscScalar *values=0; 2950 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2951 PetscErrorCode ierr; 2952 2953 PetscFunctionBegin; 2954 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2955 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2956 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2957 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2958 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2959 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2960 m = B->rmap->n/bs; 2961 2962 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2963 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2964 for (i=0; i<m; i++) { 2965 nz = ii[i+1]- ii[i]; 2966 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2967 nz_max = PetscMax(nz_max, nz); 2968 nnz[i] = nz; 2969 } 2970 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2971 ierr = PetscFree(nnz);CHKERRQ(ierr); 2972 2973 values = (PetscScalar*)V; 2974 if (!values) { 2975 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2976 } 2977 for (i=0; i<m; i++) { 2978 PetscInt ncols = ii[i+1] - ii[i]; 2979 const PetscInt *icols = jj + ii[i]; 2980 if (bs == 1 || !roworiented) { 2981 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2982 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2983 } else { 2984 PetscInt j; 2985 for (j=0; j<ncols; j++) { 2986 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2987 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2988 } 2989 } 2990 } 2991 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2992 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2993 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2994 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2995 PetscFunctionReturn(0); 2996 } 2997 2998 /*@C 2999 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3000 3001 Not Collective 3002 3003 Input Parameter: 3004 . mat - a MATSEQBAIJ matrix 3005 3006 Output Parameter: 3007 . array - pointer to the data 3008 3009 Level: intermediate 3010 3011 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3012 @*/ 3013 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3014 { 3015 PetscErrorCode ierr; 3016 3017 PetscFunctionBegin; 3018 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3019 PetscFunctionReturn(0); 3020 } 3021 3022 /*@C 3023 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3024 3025 Not Collective 3026 3027 Input Parameters: 3028 + mat - a MATSEQBAIJ matrix 3029 - array - pointer to the data 3030 3031 Level: intermediate 3032 3033 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3034 @*/ 3035 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3036 { 3037 PetscErrorCode ierr; 3038 3039 PetscFunctionBegin; 3040 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3041 PetscFunctionReturn(0); 3042 } 3043 3044 /*MC 3045 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3046 block sparse compressed row format. 3047 3048 Options Database Keys: 3049 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3050 3051 Level: beginner 3052 3053 Notes: 3054 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3055 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3056 3057 .seealso: MatCreateSeqBAIJ() 3058 M*/ 3059 3060 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3061 3062 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3063 { 3064 PetscErrorCode ierr; 3065 PetscMPIInt size; 3066 Mat_SeqBAIJ *b; 3067 3068 PetscFunctionBegin; 3069 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3070 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3071 3072 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3073 B->data = (void*)b; 3074 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3075 3076 b->row = 0; 3077 b->col = 0; 3078 b->icol = 0; 3079 b->reallocs = 0; 3080 b->saved_values = 0; 3081 3082 b->roworiented = PETSC_TRUE; 3083 b->nonew = 0; 3084 b->diag = 0; 3085 B->spptr = 0; 3086 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3087 b->keepnonzeropattern = PETSC_FALSE; 3088 3089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3099 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3100 #if defined(PETSC_HAVE_HYPRE) 3101 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3102 #endif 3103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3104 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3105 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3106 PetscFunctionReturn(0); 3107 } 3108 3109 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3110 { 3111 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3112 PetscErrorCode ierr; 3113 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3114 3115 PetscFunctionBegin; 3116 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3117 3118 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3119 c->imax = a->imax; 3120 c->ilen = a->ilen; 3121 c->free_imax_ilen = PETSC_FALSE; 3122 } else { 3123 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3124 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3125 for (i=0; i<mbs; i++) { 3126 c->imax[i] = a->imax[i]; 3127 c->ilen[i] = a->ilen[i]; 3128 } 3129 c->free_imax_ilen = PETSC_TRUE; 3130 } 3131 3132 /* allocate the matrix space */ 3133 if (mallocmatspace) { 3134 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3135 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3136 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3137 3138 c->i = a->i; 3139 c->j = a->j; 3140 c->singlemalloc = PETSC_FALSE; 3141 c->free_a = PETSC_TRUE; 3142 c->free_ij = PETSC_FALSE; 3143 c->parent = A; 3144 C->preallocated = PETSC_TRUE; 3145 C->assembled = PETSC_TRUE; 3146 3147 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3148 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3149 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3150 } else { 3151 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3152 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3153 3154 c->singlemalloc = PETSC_TRUE; 3155 c->free_a = PETSC_TRUE; 3156 c->free_ij = PETSC_TRUE; 3157 3158 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3159 if (mbs > 0) { 3160 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3161 if (cpvalues == MAT_COPY_VALUES) { 3162 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3163 } else { 3164 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3165 } 3166 } 3167 C->preallocated = PETSC_TRUE; 3168 C->assembled = PETSC_TRUE; 3169 } 3170 } 3171 3172 c->roworiented = a->roworiented; 3173 c->nonew = a->nonew; 3174 3175 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3176 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3177 3178 c->bs2 = a->bs2; 3179 c->mbs = a->mbs; 3180 c->nbs = a->nbs; 3181 3182 if (a->diag) { 3183 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3184 c->diag = a->diag; 3185 c->free_diag = PETSC_FALSE; 3186 } else { 3187 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3188 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3189 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3190 c->free_diag = PETSC_TRUE; 3191 } 3192 } else c->diag = 0; 3193 3194 c->nz = a->nz; 3195 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3196 c->solve_work = NULL; 3197 c->mult_work = NULL; 3198 c->sor_workt = NULL; 3199 c->sor_work = NULL; 3200 3201 c->compressedrow.use = a->compressedrow.use; 3202 c->compressedrow.nrows = a->compressedrow.nrows; 3203 if (a->compressedrow.use) { 3204 i = a->compressedrow.nrows; 3205 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3206 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3207 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3208 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3209 } else { 3210 c->compressedrow.use = PETSC_FALSE; 3211 c->compressedrow.i = NULL; 3212 c->compressedrow.rindex = NULL; 3213 } 3214 C->nonzerostate = A->nonzerostate; 3215 3216 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3217 PetscFunctionReturn(0); 3218 } 3219 3220 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3221 { 3222 PetscErrorCode ierr; 3223 3224 PetscFunctionBegin; 3225 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3226 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3227 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3228 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3229 PetscFunctionReturn(0); 3230 } 3231 3232 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3233 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3234 { 3235 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3236 PetscInt *rowidxs,*colidxs; 3237 PetscScalar *matvals; 3238 PetscErrorCode ierr; 3239 3240 PetscFunctionBegin; 3241 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3242 3243 /* read matrix header */ 3244 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3245 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3246 M = header[1]; N = header[2]; nz = header[3]; 3247 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3248 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3249 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3250 3251 /* set block sizes from the viewer's .info file */ 3252 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3253 /* set local and global sizes if not set already */ 3254 if (mat->rmap->n < 0) mat->rmap->n = M; 3255 if (mat->cmap->n < 0) mat->cmap->n = N; 3256 if (mat->rmap->N < 0) mat->rmap->N = M; 3257 if (mat->cmap->N < 0) mat->cmap->N = N; 3258 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3259 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3260 3261 /* check if the matrix sizes are correct */ 3262 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3263 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3264 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3265 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3266 mbs = m/bs; nbs = n/bs; 3267 3268 /* read in row lengths, column indices and nonzero values */ 3269 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3270 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3271 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3272 sum = rowidxs[m]; 3273 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3274 3275 /* read in column indices and nonzero values */ 3276 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3277 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3278 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3279 3280 { /* preallocate matrix storage */ 3281 PetscBT bt; /* helper bit set to count nonzeros */ 3282 PetscInt *nnz; 3283 PetscBool sbaij; 3284 3285 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3286 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3287 ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr); 3288 for (i=0; i<mbs; i++) { 3289 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3290 for (k=0; k<bs; k++) { 3291 PetscInt row = bs*i + k; 3292 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3293 PetscInt col = colidxs[j]; 3294 if (!sbaij || col >= row) 3295 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3296 } 3297 } 3298 } 3299 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3300 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3301 ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3302 ierr = PetscFree(nnz);CHKERRQ(ierr); 3303 } 3304 3305 /* store matrix values */ 3306 for (i=0; i<m; i++) { 3307 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3308 ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3309 } 3310 3311 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3312 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3313 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3314 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3315 PetscFunctionReturn(0); 3316 } 3317 3318 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3319 { 3320 PetscErrorCode ierr; 3321 PetscBool isbinary; 3322 3323 PetscFunctionBegin; 3324 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3325 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3326 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3327 PetscFunctionReturn(0); 3328 } 3329 3330 /*@C 3331 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3332 compressed row) format. For good matrix assembly performance the 3333 user should preallocate the matrix storage by setting the parameter nz 3334 (or the array nnz). By setting these parameters accurately, performance 3335 during matrix assembly can be increased by more than a factor of 50. 3336 3337 Collective 3338 3339 Input Parameters: 3340 + comm - MPI communicator, set to PETSC_COMM_SELF 3341 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3342 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3343 . m - number of rows 3344 . n - number of columns 3345 . nz - number of nonzero blocks per block row (same for all rows) 3346 - nnz - array containing the number of nonzero blocks in the various block rows 3347 (possibly different for each block row) or NULL 3348 3349 Output Parameter: 3350 . A - the matrix 3351 3352 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3353 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3354 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3355 3356 Options Database Keys: 3357 + -mat_no_unroll - uses code that does not unroll the loops in the 3358 block calculations (much slower) 3359 - -mat_block_size - size of the blocks to use 3360 3361 Level: intermediate 3362 3363 Notes: 3364 The number of rows and columns must be divisible by blocksize. 3365 3366 If the nnz parameter is given then the nz parameter is ignored 3367 3368 A nonzero block is any block that as 1 or more nonzeros in it 3369 3370 The block AIJ format is fully compatible with standard Fortran 77 3371 storage. That is, the stored row and column indices can begin at 3372 either one (as in Fortran) or zero. See the users' manual for details. 3373 3374 Specify the preallocated storage with either nz or nnz (not both). 3375 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3376 allocation. See Users-Manual: ch_mat for details. 3377 matrices. 3378 3379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3380 @*/ 3381 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3382 { 3383 PetscErrorCode ierr; 3384 3385 PetscFunctionBegin; 3386 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3387 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3388 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3389 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3390 PetscFunctionReturn(0); 3391 } 3392 3393 /*@C 3394 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3395 per row in the matrix. For good matrix assembly performance the 3396 user should preallocate the matrix storage by setting the parameter nz 3397 (or the array nnz). By setting these parameters accurately, performance 3398 during matrix assembly can be increased by more than a factor of 50. 3399 3400 Collective 3401 3402 Input Parameters: 3403 + B - the matrix 3404 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3405 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3406 . nz - number of block nonzeros per block row (same for all rows) 3407 - nnz - array containing the number of block nonzeros in the various block rows 3408 (possibly different for each block row) or NULL 3409 3410 Options Database Keys: 3411 + -mat_no_unroll - uses code that does not unroll the loops in the 3412 block calculations (much slower) 3413 - -mat_block_size - size of the blocks to use 3414 3415 Level: intermediate 3416 3417 Notes: 3418 If the nnz parameter is given then the nz parameter is ignored 3419 3420 You can call MatGetInfo() to get information on how effective the preallocation was; 3421 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3422 You can also run with the option -info and look for messages with the string 3423 malloc in them to see if additional memory allocation was needed. 3424 3425 The block AIJ format is fully compatible with standard Fortran 77 3426 storage. That is, the stored row and column indices can begin at 3427 either one (as in Fortran) or zero. See the users' manual for details. 3428 3429 Specify the preallocated storage with either nz or nnz (not both). 3430 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3431 allocation. See Users-Manual: ch_mat for details. 3432 3433 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3434 @*/ 3435 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3436 { 3437 PetscErrorCode ierr; 3438 3439 PetscFunctionBegin; 3440 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3441 PetscValidType(B,1); 3442 PetscValidLogicalCollectiveInt(B,bs,2); 3443 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3444 PetscFunctionReturn(0); 3445 } 3446 3447 /*@C 3448 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3449 3450 Collective 3451 3452 Input Parameters: 3453 + B - the matrix 3454 . i - the indices into j for the start of each local row (starts with zero) 3455 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3456 - v - optional values in the matrix 3457 3458 Level: advanced 3459 3460 Notes: 3461 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3462 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3463 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3464 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3465 block column and the second index is over columns within a block. 3466 3467 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3468 3469 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3470 @*/ 3471 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3472 { 3473 PetscErrorCode ierr; 3474 3475 PetscFunctionBegin; 3476 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3477 PetscValidType(B,1); 3478 PetscValidLogicalCollectiveInt(B,bs,2); 3479 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3480 PetscFunctionReturn(0); 3481 } 3482 3483 3484 /*@ 3485 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3486 3487 Collective 3488 3489 Input Parameters: 3490 + comm - must be an MPI communicator of size 1 3491 . bs - size of block 3492 . m - number of rows 3493 . n - number of columns 3494 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3495 . j - column indices 3496 - a - matrix values 3497 3498 Output Parameter: 3499 . mat - the matrix 3500 3501 Level: advanced 3502 3503 Notes: 3504 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3505 once the matrix is destroyed 3506 3507 You cannot set new nonzero locations into this matrix, that will generate an error. 3508 3509 The i and j indices are 0 based 3510 3511 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3512 3513 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3514 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3515 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3516 with column-major ordering within blocks. 3517 3518 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3519 3520 @*/ 3521 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3522 { 3523 PetscErrorCode ierr; 3524 PetscInt ii; 3525 Mat_SeqBAIJ *baij; 3526 3527 PetscFunctionBegin; 3528 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3529 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3530 3531 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3532 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3533 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3534 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3535 baij = (Mat_SeqBAIJ*)(*mat)->data; 3536 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3537 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3538 3539 baij->i = i; 3540 baij->j = j; 3541 baij->a = a; 3542 3543 baij->singlemalloc = PETSC_FALSE; 3544 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3545 baij->free_a = PETSC_FALSE; 3546 baij->free_ij = PETSC_FALSE; 3547 3548 for (ii=0; ii<m; ii++) { 3549 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3550 #if defined(PETSC_USE_DEBUG) 3551 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3552 #endif 3553 } 3554 #if defined(PETSC_USE_DEBUG) 3555 for (ii=0; ii<baij->i[m]; ii++) { 3556 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3557 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3558 } 3559 #endif 3560 3561 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3562 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3563 PetscFunctionReturn(0); 3564 } 3565 3566 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3567 { 3568 PetscErrorCode ierr; 3569 PetscMPIInt size; 3570 3571 PetscFunctionBegin; 3572 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3573 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3574 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3575 } else { 3576 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3577 } 3578 PetscFunctionReturn(0); 3579 } 3580