1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 20 21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 24 PetscErrorCode ierr; 25 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 26 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 27 PetscReal shift = 0.0; 28 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 29 30 PetscFunctionBegin; 31 allowzeropivot = PetscNot(A->erroriffailure); 32 33 if (a->idiagvalid) { 34 if (values) *values = a->idiag; 35 PetscFunctionReturn(0); 36 } 37 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 38 diag_offset = a->diag; 39 if (!a->idiag) { 40 ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr); 41 ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 42 } 43 diag = a->idiag; 44 if (values) *values = a->idiag; 45 /* factor and invert each block */ 46 switch (bs) { 47 case 1: 48 for (i=0; i<mbs; i++) { 49 odiag = v + 1*diag_offset[i]; 50 diag[0] = odiag[0]; 51 52 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 53 if (allowzeropivot) { 54 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 55 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 56 A->factorerror_zeropivot_row = i; 57 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 58 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 59 } 60 61 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 62 diag += 1; 63 } 64 break; 65 case 2: 66 for (i=0; i<mbs; i++) { 67 odiag = v + 4*diag_offset[i]; 68 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 } 73 break; 74 case 3: 75 for (i=0; i<mbs; i++) { 76 odiag = v + 9*diag_offset[i]; 77 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 78 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 79 diag[8] = odiag[8]; 80 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 81 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 82 diag += 9; 83 } 84 break; 85 case 4: 86 for (i=0; i<mbs; i++) { 87 odiag = v + 16*diag_offset[i]; 88 ierr = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 } 93 break; 94 case 5: 95 for (i=0; i<mbs; i++) { 96 odiag = v + 25*diag_offset[i]; 97 ierr = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr); 98 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 99 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 100 diag += 25; 101 } 102 break; 103 case 6: 104 for (i=0; i<mbs; i++) { 105 odiag = v + 36*diag_offset[i]; 106 ierr = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr); 107 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 108 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 109 diag += 36; 110 } 111 break; 112 case 7: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 49*diag_offset[i]; 115 ierr = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr); 116 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 117 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 diag += 49; 119 } 120 break; 121 default: 122 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 123 for (i=0; i<mbs; i++) { 124 odiag = v + bs2*diag_offset[i]; 125 ierr = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr); 126 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 127 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 diag += bs2; 129 } 130 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 131 } 132 a->idiagvalid = PETSC_TRUE; 133 PetscFunctionReturn(0); 134 } 135 136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 137 { 138 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 139 PetscScalar *x,*work,*w,*workt,*t; 140 const MatScalar *v,*aa = a->a, *idiag; 141 const PetscScalar *b,*xb; 142 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 143 PetscErrorCode ierr; 144 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 145 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 146 147 PetscFunctionBegin; 148 its = its*lits; 149 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 150 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 151 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 152 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 153 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 154 155 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 156 157 if (!m) PetscFunctionReturn(0); 158 diag = a->diag; 159 idiag = a->idiag; 160 k = PetscMax(A->rmap->n,A->cmap->n); 161 if (!a->mult_work) { 162 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 163 } 164 if (!a->sor_workt) { 165 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 166 } 167 if (!a->sor_work) { 168 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 169 } 170 work = a->mult_work; 171 t = a->sor_workt; 172 w = a->sor_work; 173 174 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 175 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 176 177 if (flag & SOR_ZERO_INITIAL_GUESS) { 178 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 179 switch (bs) { 180 case 1: 181 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 182 t[0] = b[0]; 183 i2 = 1; 184 idiag += 1; 185 for (i=1; i<m; i++) { 186 v = aa + ai[i]; 187 vi = aj + ai[i]; 188 nz = diag[i] - ai[i]; 189 s[0] = b[i2]; 190 for (j=0; j<nz; j++) { 191 xw[0] = x[vi[j]]; 192 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 193 } 194 t[i2] = s[0]; 195 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 196 x[i2] = xw[0]; 197 idiag += 1; 198 i2 += 1; 199 } 200 break; 201 case 2: 202 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 203 t[0] = b[0]; t[1] = b[1]; 204 i2 = 2; 205 idiag += 4; 206 for (i=1; i<m; i++) { 207 v = aa + 4*ai[i]; 208 vi = aj + ai[i]; 209 nz = diag[i] - ai[i]; 210 s[0] = b[i2]; s[1] = b[i2+1]; 211 for (j=0; j<nz; j++) { 212 idx = 2*vi[j]; 213 it = 4*j; 214 xw[0] = x[idx]; xw[1] = x[1+idx]; 215 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 216 } 217 t[i2] = s[0]; t[i2+1] = s[1]; 218 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 219 x[i2] = xw[0]; x[i2+1] = xw[1]; 220 idiag += 4; 221 i2 += 2; 222 } 223 break; 224 case 3: 225 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 226 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 227 i2 = 3; 228 idiag += 9; 229 for (i=1; i<m; i++) { 230 v = aa + 9*ai[i]; 231 vi = aj + ai[i]; 232 nz = diag[i] - ai[i]; 233 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 234 while (nz--) { 235 idx = 3*(*vi++); 236 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 237 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 238 v += 9; 239 } 240 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 241 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 242 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 243 idiag += 9; 244 i2 += 3; 245 } 246 break; 247 case 4: 248 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 249 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 250 i2 = 4; 251 idiag += 16; 252 for (i=1; i<m; i++) { 253 v = aa + 16*ai[i]; 254 vi = aj + ai[i]; 255 nz = diag[i] - ai[i]; 256 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 257 while (nz--) { 258 idx = 4*(*vi++); 259 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 260 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 261 v += 16; 262 } 263 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 264 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 265 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 266 idiag += 16; 267 i2 += 4; 268 } 269 break; 270 case 5: 271 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 272 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 273 i2 = 5; 274 idiag += 25; 275 for (i=1; i<m; i++) { 276 v = aa + 25*ai[i]; 277 vi = aj + ai[i]; 278 nz = diag[i] - ai[i]; 279 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 280 while (nz--) { 281 idx = 5*(*vi++); 282 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 283 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 284 v += 25; 285 } 286 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 287 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 288 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 289 idiag += 25; 290 i2 += 5; 291 } 292 break; 293 case 6: 294 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 295 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 296 i2 = 6; 297 idiag += 36; 298 for (i=1; i<m; i++) { 299 v = aa + 36*ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 303 while (nz--) { 304 idx = 6*(*vi++); 305 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 306 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 307 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 308 v += 36; 309 } 310 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 311 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 312 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 313 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 314 idiag += 36; 315 i2 += 6; 316 } 317 break; 318 case 7: 319 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 320 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 321 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 322 i2 = 7; 323 idiag += 49; 324 for (i=1; i<m; i++) { 325 v = aa + 49*ai[i]; 326 vi = aj + ai[i]; 327 nz = diag[i] - ai[i]; 328 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 329 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 330 while (nz--) { 331 idx = 7*(*vi++); 332 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 333 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 334 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 335 v += 49; 336 } 337 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 338 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 339 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 340 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 341 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 342 idiag += 49; 343 i2 += 7; 344 } 345 break; 346 default: 347 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 348 ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr); 349 i2 = bs; 350 idiag += bs2; 351 for (i=1; i<m; i++) { 352 v = aa + bs2*ai[i]; 353 vi = aj + ai[i]; 354 nz = diag[i] - ai[i]; 355 356 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 357 /* copy all rows of x that are needed into contiguous space */ 358 workt = work; 359 for (j=0; j<nz; j++) { 360 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 361 workt += bs; 362 } 363 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 364 ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr); 365 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 366 367 idiag += bs2; 368 i2 += bs; 369 } 370 break; 371 } 372 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 373 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 374 xb = t; 375 } 376 else xb = b; 377 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 378 idiag = a->idiag+bs2*(a->mbs-1); 379 i2 = bs * (m-1); 380 switch (bs) { 381 case 1: 382 s[0] = xb[i2]; 383 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 384 x[i2] = xw[0]; 385 i2 -= 1; 386 for (i=m-2; i>=0; i--) { 387 v = aa + (diag[i]+1); 388 vi = aj + diag[i] + 1; 389 nz = ai[i+1] - diag[i] - 1; 390 s[0] = xb[i2]; 391 for (j=0; j<nz; j++) { 392 xw[0] = x[vi[j]]; 393 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 394 } 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 idiag -= 1; 398 i2 -= 1; 399 } 400 break; 401 case 2: 402 s[0] = xb[i2]; s[1] = xb[i2+1]; 403 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 404 x[i2] = xw[0]; x[i2+1] = xw[1]; 405 i2 -= 2; 406 idiag -= 4; 407 for (i=m-2; i>=0; i--) { 408 v = aa + 4*(diag[i] + 1); 409 vi = aj + diag[i] + 1; 410 nz = ai[i+1] - diag[i] - 1; 411 s[0] = xb[i2]; s[1] = xb[i2+1]; 412 for (j=0; j<nz; j++) { 413 idx = 2*vi[j]; 414 it = 4*j; 415 xw[0] = x[idx]; xw[1] = x[1+idx]; 416 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 417 } 418 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 419 x[i2] = xw[0]; x[i2+1] = xw[1]; 420 idiag -= 4; 421 i2 -= 2; 422 } 423 break; 424 case 3: 425 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 426 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 427 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 428 i2 -= 3; 429 idiag -= 9; 430 for (i=m-2; i>=0; i--) { 431 v = aa + 9*(diag[i]+1); 432 vi = aj + diag[i] + 1; 433 nz = ai[i+1] - diag[i] - 1; 434 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 435 while (nz--) { 436 idx = 3*(*vi++); 437 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 438 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 439 v += 9; 440 } 441 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 442 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 443 idiag -= 9; 444 i2 -= 3; 445 } 446 break; 447 case 4: 448 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 449 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 450 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 451 i2 -= 4; 452 idiag -= 16; 453 for (i=m-2; i>=0; i--) { 454 v = aa + 16*(diag[i]+1); 455 vi = aj + diag[i] + 1; 456 nz = ai[i+1] - diag[i] - 1; 457 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 458 while (nz--) { 459 idx = 4*(*vi++); 460 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 461 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 462 v += 16; 463 } 464 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 465 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 466 idiag -= 16; 467 i2 -= 4; 468 } 469 break; 470 case 5: 471 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 472 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 473 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 474 i2 -= 5; 475 idiag -= 25; 476 for (i=m-2; i>=0; i--) { 477 v = aa + 25*(diag[i]+1); 478 vi = aj + diag[i] + 1; 479 nz = ai[i+1] - diag[i] - 1; 480 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 481 while (nz--) { 482 idx = 5*(*vi++); 483 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 485 v += 25; 486 } 487 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 488 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 489 idiag -= 25; 490 i2 -= 5; 491 } 492 break; 493 case 6: 494 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 495 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 496 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 497 i2 -= 6; 498 idiag -= 36; 499 for (i=m-2; i>=0; i--) { 500 v = aa + 36*(diag[i]+1); 501 vi = aj + diag[i] + 1; 502 nz = ai[i+1] - diag[i] - 1; 503 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 504 while (nz--) { 505 idx = 6*(*vi++); 506 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 507 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 508 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 509 v += 36; 510 } 511 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 513 idiag -= 36; 514 i2 -= 6; 515 } 516 break; 517 case 7: 518 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 519 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 520 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 521 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 522 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 523 i2 -= 7; 524 idiag -= 49; 525 for (i=m-2; i>=0; i--) { 526 v = aa + 49*(diag[i]+1); 527 vi = aj + diag[i] + 1; 528 nz = ai[i+1] - diag[i] - 1; 529 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 530 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 531 while (nz--) { 532 idx = 7*(*vi++); 533 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 534 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 535 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 536 v += 49; 537 } 538 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 idiag -= 49; 542 i2 -= 7; 543 } 544 break; 545 default: 546 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 547 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 548 i2 -= bs; 549 idiag -= bs2; 550 for (i=m-2; i>=0; i--) { 551 v = aa + bs2*(diag[i]+1); 552 vi = aj + diag[i] + 1; 553 nz = ai[i+1] - diag[i] - 1; 554 555 ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr); 556 /* copy all rows of x that are needed into contiguous space */ 557 workt = work; 558 for (j=0; j<nz; j++) { 559 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 560 workt += bs; 561 } 562 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 563 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 564 565 idiag -= bs2; 566 i2 -= bs; 567 } 568 break; 569 } 570 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 571 } 572 its--; 573 } 574 while (its--) { 575 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 576 idiag = a->idiag; 577 i2 = 0; 578 switch (bs) { 579 case 1: 580 for (i=0; i<m; i++) { 581 v = aa + ai[i]; 582 vi = aj + ai[i]; 583 nz = ai[i+1] - ai[i]; 584 s[0] = b[i2]; 585 for (j=0; j<nz; j++) { 586 xw[0] = x[vi[j]]; 587 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 588 } 589 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 590 x[i2] += xw[0]; 591 idiag += 1; 592 i2 += 1; 593 } 594 break; 595 case 2: 596 for (i=0; i<m; i++) { 597 v = aa + 4*ai[i]; 598 vi = aj + ai[i]; 599 nz = ai[i+1] - ai[i]; 600 s[0] = b[i2]; s[1] = b[i2+1]; 601 for (j=0; j<nz; j++) { 602 idx = 2*vi[j]; 603 it = 4*j; 604 xw[0] = x[idx]; xw[1] = x[1+idx]; 605 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 606 } 607 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 608 x[i2] += xw[0]; x[i2+1] += xw[1]; 609 idiag += 4; 610 i2 += 2; 611 } 612 break; 613 case 3: 614 for (i=0; i<m; i++) { 615 v = aa + 9*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 619 while (nz--) { 620 idx = 3*(*vi++); 621 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 627 idiag += 9; 628 i2 += 3; 629 } 630 break; 631 case 4: 632 for (i=0; i<m; i++) { 633 v = aa + 16*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 637 while (nz--) { 638 idx = 4*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 641 v += 16; 642 } 643 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 645 idiag += 16; 646 i2 += 4; 647 } 648 break; 649 case 5: 650 for (i=0; i<m; i++) { 651 v = aa + 25*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 655 while (nz--) { 656 idx = 5*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 659 v += 25; 660 } 661 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 663 idiag += 25; 664 i2 += 5; 665 } 666 break; 667 case 6: 668 for (i=0; i<m; i++) { 669 v = aa + 36*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 673 while (nz--) { 674 idx = 6*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 676 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 677 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 678 v += 36; 679 } 680 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 681 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 682 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 683 idiag += 36; 684 i2 += 6; 685 } 686 break; 687 case 7: 688 for (i=0; i<m; i++) { 689 v = aa + 49*ai[i]; 690 vi = aj + ai[i]; 691 nz = ai[i+1] - ai[i]; 692 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 693 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 694 while (nz--) { 695 idx = 7*(*vi++); 696 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 697 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 698 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 699 v += 49; 700 } 701 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 702 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 703 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 704 idiag += 49; 705 i2 += 7; 706 } 707 break; 708 default: 709 for (i=0; i<m; i++) { 710 v = aa + bs2*ai[i]; 711 vi = aj + ai[i]; 712 nz = ai[i+1] - ai[i]; 713 714 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 715 /* copy all rows of x that are needed into contiguous space */ 716 workt = work; 717 for (j=0; j<nz; j++) { 718 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 719 workt += bs; 720 } 721 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 722 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 723 724 idiag += bs2; 725 i2 += bs; 726 } 727 break; 728 } 729 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 730 } 731 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 732 idiag = a->idiag+bs2*(a->mbs-1); 733 i2 = bs * (m-1); 734 switch (bs) { 735 case 1: 736 for (i=m-1; i>=0; i--) { 737 v = aa + ai[i]; 738 vi = aj + ai[i]; 739 nz = ai[i+1] - ai[i]; 740 s[0] = b[i2]; 741 for (j=0; j<nz; j++) { 742 xw[0] = x[vi[j]]; 743 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 744 } 745 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 746 x[i2] += xw[0]; 747 idiag -= 1; 748 i2 -= 1; 749 } 750 break; 751 case 2: 752 for (i=m-1; i>=0; i--) { 753 v = aa + 4*ai[i]; 754 vi = aj + ai[i]; 755 nz = ai[i+1] - ai[i]; 756 s[0] = b[i2]; s[1] = b[i2+1]; 757 for (j=0; j<nz; j++) { 758 idx = 2*vi[j]; 759 it = 4*j; 760 xw[0] = x[idx]; xw[1] = x[1+idx]; 761 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 762 } 763 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; 765 idiag -= 4; 766 i2 -= 2; 767 } 768 break; 769 case 3: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 9*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 775 while (nz--) { 776 idx = 3*(*vi++); 777 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 778 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 779 v += 9; 780 } 781 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 783 idiag -= 9; 784 i2 -= 3; 785 } 786 break; 787 case 4: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 16*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 793 while (nz--) { 794 idx = 4*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 797 v += 16; 798 } 799 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 801 idiag -= 16; 802 i2 -= 4; 803 } 804 break; 805 case 5: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 25*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 811 while (nz--) { 812 idx = 5*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 815 v += 25; 816 } 817 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 819 idiag -= 25; 820 i2 -= 5; 821 } 822 break; 823 case 6: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 36*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 829 while (nz--) { 830 idx = 6*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 832 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 833 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 834 v += 36; 835 } 836 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 837 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 838 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 839 idiag -= 36; 840 i2 -= 6; 841 } 842 break; 843 case 7: 844 for (i=m-1; i>=0; i--) { 845 v = aa + 49*ai[i]; 846 vi = aj + ai[i]; 847 nz = ai[i+1] - ai[i]; 848 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 849 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 850 while (nz--) { 851 idx = 7*(*vi++); 852 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 853 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 854 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 855 v += 49; 856 } 857 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 858 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 859 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 860 idiag -= 49; 861 i2 -= 7; 862 } 863 break; 864 default: 865 for (i=m-1; i>=0; i--) { 866 v = aa + bs2*ai[i]; 867 vi = aj + ai[i]; 868 nz = ai[i+1] - ai[i]; 869 870 ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr); 871 /* copy all rows of x that are needed into contiguous space */ 872 workt = work; 873 for (j=0; j<nz; j++) { 874 ierr = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr); 875 workt += bs; 876 } 877 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 878 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 879 880 idiag -= bs2; 881 i2 -= bs; 882 } 883 break; 884 } 885 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 886 } 887 } 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 894 /* 895 Special version for direct calls from Fortran (Used in PETSc-fun3d) 896 */ 897 #if defined(PETSC_HAVE_FORTRAN_CAPS) 898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 900 #define matsetvaluesblocked4_ matsetvaluesblocked4 901 #endif 902 903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 904 { 905 Mat A = *AA; 906 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 907 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 908 PetscInt *ai =a->i,*ailen=a->ilen; 909 PetscInt *aj =a->j,stepval,lastcol = -1; 910 const PetscScalar *value = v; 911 MatScalar *ap,*aa = a->a,*bap; 912 PetscErrorCode ierr; 913 914 PetscFunctionBegin; 915 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 916 stepval = (n-1)*4; 917 for (k=0; k<m; k++) { /* loop over added rows */ 918 row = im[k]; 919 rp = aj + ai[row]; 920 ap = aa + 16*ai[row]; 921 nrow = ailen[row]; 922 low = 0; 923 high = nrow; 924 for (l=0; l<n; l++) { /* loop over added columns */ 925 col = in[l]; 926 if (col <= lastcol) low = 0; 927 else high = nrow; 928 lastcol = col; 929 value = v + k*(stepval+4 + l)*4; 930 while (high-low > 7) { 931 t = (low+high)/2; 932 if (rp[t] > col) high = t; 933 else low = t; 934 } 935 for (i=low; i<high; i++) { 936 if (rp[i] > col) break; 937 if (rp[i] == col) { 938 bap = ap + 16*i; 939 for (ii=0; ii<4; ii++,value+=stepval) { 940 for (jj=ii; jj<16; jj+=4) { 941 bap[jj] += *value++; 942 } 943 } 944 goto noinsert2; 945 } 946 } 947 N = nrow++ - 1; 948 high++; /* added new column index thus must search to one higher than before */ 949 /* shift up all the later entries in this row */ 950 for (ii=N; ii>=i; ii--) { 951 rp[ii+1] = rp[ii]; 952 ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr); 953 } 954 if (N >= i) { 955 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 956 } 957 rp[i] = col; 958 bap = ap + 16*i; 959 for (ii=0; ii<4; ii++,value+=stepval) { 960 for (jj=ii; jj<16; jj+=4) { 961 bap[jj] = *value++; 962 } 963 } 964 noinsert2:; 965 low = i; 966 } 967 ailen[row] = nrow; 968 } 969 PetscFunctionReturnVoid(); 970 } 971 972 #if defined(PETSC_HAVE_FORTRAN_CAPS) 973 #define matsetvalues4_ MATSETVALUES4 974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 975 #define matsetvalues4_ matsetvalues4 976 #endif 977 978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 979 { 980 Mat A = *AA; 981 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 982 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 983 PetscInt *ai=a->i,*ailen=a->ilen; 984 PetscInt *aj=a->j,brow,bcol; 985 PetscInt ridx,cidx,lastcol = -1; 986 MatScalar *ap,value,*aa=a->a,*bap; 987 PetscErrorCode ierr; 988 989 PetscFunctionBegin; 990 for (k=0; k<m; k++) { /* loop over added rows */ 991 row = im[k]; brow = row/4; 992 rp = aj + ai[brow]; 993 ap = aa + 16*ai[brow]; 994 nrow = ailen[brow]; 995 low = 0; 996 high = nrow; 997 for (l=0; l<n; l++) { /* loop over added columns */ 998 col = in[l]; bcol = col/4; 999 ridx = row % 4; cidx = col % 4; 1000 value = v[l + k*n]; 1001 if (col <= lastcol) low = 0; 1002 else high = nrow; 1003 lastcol = col; 1004 while (high-low > 7) { 1005 t = (low+high)/2; 1006 if (rp[t] > bcol) high = t; 1007 else low = t; 1008 } 1009 for (i=low; i<high; i++) { 1010 if (rp[i] > bcol) break; 1011 if (rp[i] == bcol) { 1012 bap = ap + 16*i + 4*cidx + ridx; 1013 *bap += value; 1014 goto noinsert1; 1015 } 1016 } 1017 N = nrow++ - 1; 1018 high++; /* added new column thus must search to one higher than before */ 1019 /* shift up all the later entries in this row */ 1020 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr); 1021 ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr); 1022 ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr); 1023 rp[i] = bcol; 1024 ap[16*i + 4*cidx + ridx] = value; 1025 noinsert1:; 1026 low = i; 1027 } 1028 ailen[brow] = nrow; 1029 } 1030 PetscFunctionReturnVoid(); 1031 } 1032 1033 /* 1034 Checks for missing diagonals 1035 */ 1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1037 { 1038 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1039 PetscErrorCode ierr; 1040 PetscInt *diag,*ii = a->i,i; 1041 1042 PetscFunctionBegin; 1043 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1044 *missing = PETSC_FALSE; 1045 if (A->rmap->n > 0 && !ii) { 1046 *missing = PETSC_TRUE; 1047 if (d) *d = 0; 1048 ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr); 1049 } else { 1050 PetscInt n; 1051 n = PetscMin(a->mbs, a->nbs); 1052 diag = a->diag; 1053 for (i=0; i<n; i++) { 1054 if (diag[i] >= ii[i+1]) { 1055 *missing = PETSC_TRUE; 1056 if (d) *d = i; 1057 ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr); 1058 break; 1059 } 1060 } 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1066 { 1067 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1068 PetscErrorCode ierr; 1069 PetscInt i,j,m = a->mbs; 1070 1071 PetscFunctionBegin; 1072 if (!a->diag) { 1073 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1074 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1075 a->free_diag = PETSC_TRUE; 1076 } 1077 for (i=0; i<m; i++) { 1078 a->diag[i] = a->i[i+1]; 1079 for (j=a->i[i]; j<a->i[i+1]; j++) { 1080 if (a->j[j] == i) { 1081 a->diag[i] = j; 1082 break; 1083 } 1084 } 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1091 { 1092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1095 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1096 1097 PetscFunctionBegin; 1098 *nn = n; 1099 if (!ia) PetscFunctionReturn(0); 1100 if (symmetric) { 1101 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1102 nz = tia[n]; 1103 } else { 1104 tia = a->i; tja = a->j; 1105 } 1106 1107 if (!blockcompressed && bs > 1) { 1108 (*nn) *= bs; 1109 /* malloc & create the natural set of indices */ 1110 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1111 if (n) { 1112 (*ia)[0] = oshift; 1113 for (j=1; j<bs; j++) { 1114 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1115 } 1116 } 1117 1118 for (i=1; i<n; i++) { 1119 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1120 for (j=1; j<bs; j++) { 1121 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1122 } 1123 } 1124 if (n) { 1125 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1126 } 1127 1128 if (inja) { 1129 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1130 cnt = 0; 1131 for (i=0; i<n; i++) { 1132 for (j=0; j<bs; j++) { 1133 for (k=tia[i]; k<tia[i+1]; k++) { 1134 for (l=0; l<bs; l++) { 1135 (*ja)[cnt++] = bs*tja[k] + l; 1136 } 1137 } 1138 } 1139 } 1140 } 1141 1142 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1143 ierr = PetscFree(tia);CHKERRQ(ierr); 1144 ierr = PetscFree(tja);CHKERRQ(ierr); 1145 } 1146 } else if (oshift == 1) { 1147 if (symmetric) { 1148 nz = tia[A->rmap->n/bs]; 1149 /* add 1 to i and j indices */ 1150 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1151 *ia = tia; 1152 if (ja) { 1153 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1154 *ja = tja; 1155 } 1156 } else { 1157 nz = a->i[A->rmap->n/bs]; 1158 /* malloc space and add 1 to i and j indices */ 1159 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1160 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1161 if (ja) { 1162 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1163 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1164 } 1165 } 1166 } else { 1167 *ia = tia; 1168 if (ja) *ja = tja; 1169 } 1170 PetscFunctionReturn(0); 1171 } 1172 1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1174 { 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (!ia) PetscFunctionReturn(0); 1179 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1180 ierr = PetscFree(*ia);CHKERRQ(ierr); 1181 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1187 { 1188 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1189 PetscErrorCode ierr; 1190 1191 PetscFunctionBegin; 1192 #if defined(PETSC_USE_LOG) 1193 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1194 #endif 1195 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1196 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1197 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1198 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1199 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1200 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1201 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1202 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1203 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1204 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1205 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1206 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1207 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1208 1209 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1210 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1211 ierr = PetscFree(A->data);CHKERRQ(ierr); 1212 1213 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr); 1215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr); 1216 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1217 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1218 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1219 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1220 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1221 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1222 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1223 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1224 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1225 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1226 #if defined(PETSC_HAVE_HYPRE) 1227 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr); 1228 #endif 1229 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr); 1230 ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1235 { 1236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1237 PetscErrorCode ierr; 1238 1239 PetscFunctionBegin; 1240 switch (op) { 1241 case MAT_ROW_ORIENTED: 1242 a->roworiented = flg; 1243 break; 1244 case MAT_KEEP_NONZERO_PATTERN: 1245 a->keepnonzeropattern = flg; 1246 break; 1247 case MAT_NEW_NONZERO_LOCATIONS: 1248 a->nonew = (flg ? 0 : 1); 1249 break; 1250 case MAT_NEW_NONZERO_LOCATION_ERR: 1251 a->nonew = (flg ? -1 : 0); 1252 break; 1253 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1254 a->nonew = (flg ? -2 : 0); 1255 break; 1256 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1257 a->nounused = (flg ? -1 : 0); 1258 break; 1259 case MAT_NEW_DIAGONALS: 1260 case MAT_IGNORE_OFF_PROC_ENTRIES: 1261 case MAT_USE_HASH_TABLE: 1262 case MAT_SORTED_FULL: 1263 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1264 break; 1265 case MAT_SPD: 1266 case MAT_SYMMETRIC: 1267 case MAT_STRUCTURALLY_SYMMETRIC: 1268 case MAT_HERMITIAN: 1269 case MAT_SYMMETRY_ETERNAL: 1270 case MAT_SUBMAT_SINGLEIS: 1271 case MAT_STRUCTURE_ONLY: 1272 /* These options are handled directly by MatSetOption() */ 1273 break; 1274 default: 1275 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1281 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1282 { 1283 PetscErrorCode ierr; 1284 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1285 MatScalar *aa_i; 1286 PetscScalar *v_i; 1287 1288 PetscFunctionBegin; 1289 bs = A->rmap->bs; 1290 bs2 = bs*bs; 1291 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1292 1293 bn = row/bs; /* Block number */ 1294 bp = row % bs; /* Block Position */ 1295 M = ai[bn+1] - ai[bn]; 1296 *nz = bs*M; 1297 1298 if (v) { 1299 *v = 0; 1300 if (*nz) { 1301 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1302 for (i=0; i<M; i++) { /* for each block in the block row */ 1303 v_i = *v + i*bs; 1304 aa_i = aa + bs2*(ai[bn] + i); 1305 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1306 } 1307 } 1308 } 1309 1310 if (idx) { 1311 *idx = 0; 1312 if (*nz) { 1313 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1314 for (i=0; i<M; i++) { /* for each block in the block row */ 1315 idx_i = *idx + i*bs; 1316 itmp = bs*aj[ai[bn] + i]; 1317 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1318 } 1319 } 1320 } 1321 PetscFunctionReturn(0); 1322 } 1323 1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1325 { 1326 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1327 PetscErrorCode ierr; 1328 1329 PetscFunctionBegin; 1330 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1331 PetscFunctionReturn(0); 1332 } 1333 1334 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1335 { 1336 PetscErrorCode ierr; 1337 1338 PetscFunctionBegin; 1339 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1340 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1341 PetscFunctionReturn(0); 1342 } 1343 1344 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1345 { 1346 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1347 Mat C; 1348 PetscErrorCode ierr; 1349 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1350 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1351 MatScalar *ata,*aa=a->a; 1352 1353 PetscFunctionBegin; 1354 ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr); 1355 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1356 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1357 1358 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1359 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1360 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1361 ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr); 1362 1363 at = (Mat_SeqBAIJ*)C->data; 1364 ati = at->i; 1365 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1366 } else { 1367 C = *B; 1368 at = (Mat_SeqBAIJ*)C->data; 1369 ati = at->i; 1370 } 1371 1372 atj = at->j; 1373 ata = at->a; 1374 1375 /* Copy ati into atfill so we have locations of the next free space in atj */ 1376 ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr); 1377 1378 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1379 for (i=0; i<mbs; i++) { 1380 anzj = ai[i+1] - ai[i]; 1381 for (j=0; j<anzj; j++) { 1382 atj[atfill[*aj]] = i; 1383 for (kr=0; kr<bs; kr++) { 1384 for (k=0; k<bs; k++) { 1385 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1386 } 1387 } 1388 atfill[*aj++] += 1; 1389 } 1390 } 1391 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1392 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 1394 /* Clean up temporary space and complete requests. */ 1395 ierr = PetscFree(atfill);CHKERRQ(ierr); 1396 1397 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1398 ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1399 *B = C; 1400 } else { 1401 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1402 } 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1407 { 1408 PetscErrorCode ierr; 1409 Mat Btrans; 1410 1411 PetscFunctionBegin; 1412 *f = PETSC_FALSE; 1413 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1414 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1415 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1420 { 1421 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1422 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1423 PetscInt *rowlens,*colidxs; 1424 PetscScalar *matvals; 1425 PetscErrorCode ierr; 1426 1427 PetscFunctionBegin; 1428 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1429 1430 M = mat->rmap->N; 1431 N = mat->cmap->N; 1432 m = mat->rmap->n; 1433 bs = mat->rmap->bs; 1434 nz = bs*bs*A->nz; 1435 1436 /* write matrix header */ 1437 header[0] = MAT_FILE_CLASSID; 1438 header[1] = M; header[2] = N; header[3] = nz; 1439 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1440 1441 /* store row lengths */ 1442 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1443 for (cnt=0, i=0; i<A->mbs; i++) 1444 for (j=0; j<bs; j++) 1445 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1446 ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr); 1447 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1448 1449 /* store column indices */ 1450 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1451 for (cnt=0, i=0; i<A->mbs; i++) 1452 for (k=0; k<bs; k++) 1453 for (j=A->i[i]; j<A->i[i+1]; j++) 1454 for (l=0; l<bs; l++) 1455 colidxs[cnt++] = bs*A->j[j] + l; 1456 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1457 ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr); 1458 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1459 1460 /* store nonzero values */ 1461 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1462 for (cnt=0, i=0; i<A->mbs; i++) 1463 for (k=0; k<bs; k++) 1464 for (j=A->i[i]; j<A->i[i+1]; j++) 1465 for (l=0; l<bs; l++) 1466 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1467 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1468 ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr); 1469 ierr = PetscFree(matvals);CHKERRQ(ierr); 1470 1471 /* write block size option to the viewer's .info file */ 1472 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } 1475 1476 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1477 { 1478 PetscErrorCode ierr; 1479 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1480 PetscInt i,bs = A->rmap->bs,k; 1481 1482 PetscFunctionBegin; 1483 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1484 for (i=0; i<a->mbs; i++) { 1485 ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr); 1486 for (k=a->i[i]; k<a->i[i+1]; k++) { 1487 ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr); 1488 } 1489 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1490 } 1491 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1492 PetscFunctionReturn(0); 1493 } 1494 1495 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1496 { 1497 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1498 PetscErrorCode ierr; 1499 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1500 PetscViewerFormat format; 1501 1502 PetscFunctionBegin; 1503 if (A->structure_only) { 1504 ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr); 1505 PetscFunctionReturn(0); 1506 } 1507 1508 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1509 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1510 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1511 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1512 const char *matname; 1513 Mat aij; 1514 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1515 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1516 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1517 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1518 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1519 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1520 PetscFunctionReturn(0); 1521 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1522 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1523 for (i=0; i<a->mbs; i++) { 1524 for (j=0; j<bs; j++) { 1525 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1526 for (k=a->i[i]; k<a->i[i+1]; k++) { 1527 for (l=0; l<bs; l++) { 1528 #if defined(PETSC_USE_COMPLEX) 1529 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1530 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1531 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1532 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1533 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1534 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1535 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1536 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1537 } 1538 #else 1539 if (a->a[bs2*k + l*bs + j] != 0.0) { 1540 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1541 } 1542 #endif 1543 } 1544 } 1545 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1546 } 1547 } 1548 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1549 } else { 1550 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1551 for (i=0; i<a->mbs; i++) { 1552 for (j=0; j<bs; j++) { 1553 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1554 for (k=a->i[i]; k<a->i[i+1]; k++) { 1555 for (l=0; l<bs; l++) { 1556 #if defined(PETSC_USE_COMPLEX) 1557 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1558 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1559 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1560 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1561 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1562 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1563 } else { 1564 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1565 } 1566 #else 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1568 #endif 1569 } 1570 } 1571 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1572 } 1573 } 1574 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1575 } 1576 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 #include <petscdraw.h> 1581 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1582 { 1583 Mat A = (Mat) Aa; 1584 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1585 PetscErrorCode ierr; 1586 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1587 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1588 MatScalar *aa; 1589 PetscViewer viewer; 1590 PetscViewerFormat format; 1591 1592 PetscFunctionBegin; 1593 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1594 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1595 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1596 1597 /* loop over matrix elements drawing boxes */ 1598 1599 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1600 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1601 /* Blue for negative, Cyan for zero and Red for positive */ 1602 color = PETSC_DRAW_BLUE; 1603 for (i=0,row=0; i<mbs; i++,row+=bs) { 1604 for (j=a->i[i]; j<a->i[i+1]; j++) { 1605 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1606 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1607 aa = a->a + j*bs2; 1608 for (k=0; k<bs; k++) { 1609 for (l=0; l<bs; l++) { 1610 if (PetscRealPart(*aa++) >= 0.) continue; 1611 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1612 } 1613 } 1614 } 1615 } 1616 color = PETSC_DRAW_CYAN; 1617 for (i=0,row=0; i<mbs; i++,row+=bs) { 1618 for (j=a->i[i]; j<a->i[i+1]; j++) { 1619 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1620 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1621 aa = a->a + j*bs2; 1622 for (k=0; k<bs; k++) { 1623 for (l=0; l<bs; l++) { 1624 if (PetscRealPart(*aa++) != 0.) continue; 1625 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1626 } 1627 } 1628 } 1629 } 1630 color = PETSC_DRAW_RED; 1631 for (i=0,row=0; i<mbs; i++,row+=bs) { 1632 for (j=a->i[i]; j<a->i[i+1]; j++) { 1633 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1634 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1635 aa = a->a + j*bs2; 1636 for (k=0; k<bs; k++) { 1637 for (l=0; l<bs; l++) { 1638 if (PetscRealPart(*aa++) <= 0.) continue; 1639 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1640 } 1641 } 1642 } 1643 } 1644 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1645 } else { 1646 /* use contour shading to indicate magnitude of values */ 1647 /* first determine max of all nonzero values */ 1648 PetscReal minv = 0.0, maxv = 0.0; 1649 PetscDraw popup; 1650 1651 for (i=0; i<a->nz*a->bs2; i++) { 1652 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1653 } 1654 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1655 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1656 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1657 1658 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1659 for (i=0,row=0; i<mbs; i++,row+=bs) { 1660 for (j=a->i[i]; j<a->i[i+1]; j++) { 1661 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1662 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1663 aa = a->a + j*bs2; 1664 for (k=0; k<bs; k++) { 1665 for (l=0; l<bs; l++) { 1666 MatScalar v = *aa++; 1667 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1668 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1669 } 1670 } 1671 } 1672 } 1673 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1674 } 1675 PetscFunctionReturn(0); 1676 } 1677 1678 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1679 { 1680 PetscErrorCode ierr; 1681 PetscReal xl,yl,xr,yr,w,h; 1682 PetscDraw draw; 1683 PetscBool isnull; 1684 1685 PetscFunctionBegin; 1686 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1687 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1688 if (isnull) PetscFunctionReturn(0); 1689 1690 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1691 xr += w; yr += h; xl = -w; yl = -h; 1692 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1693 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1694 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1695 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1696 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1697 PetscFunctionReturn(0); 1698 } 1699 1700 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1701 { 1702 PetscErrorCode ierr; 1703 PetscBool iascii,isbinary,isdraw; 1704 1705 PetscFunctionBegin; 1706 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1707 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1708 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1709 if (iascii) { 1710 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1711 } else if (isbinary) { 1712 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1713 } else if (isdraw) { 1714 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1715 } else { 1716 Mat B; 1717 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1718 ierr = MatView(B,viewer);CHKERRQ(ierr); 1719 ierr = MatDestroy(&B);CHKERRQ(ierr); 1720 } 1721 PetscFunctionReturn(0); 1722 } 1723 1724 1725 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1726 { 1727 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1728 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1729 PetscInt *ai = a->i,*ailen = a->ilen; 1730 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1731 MatScalar *ap,*aa = a->a; 1732 1733 PetscFunctionBegin; 1734 for (k=0; k<m; k++) { /* loop over rows */ 1735 row = im[k]; brow = row/bs; 1736 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1737 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1738 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1739 nrow = ailen[brow]; 1740 for (l=0; l<n; l++) { /* loop over columns */ 1741 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1742 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1743 col = in[l]; 1744 bcol = col/bs; 1745 cidx = col%bs; 1746 ridx = row%bs; 1747 high = nrow; 1748 low = 0; /* assume unsorted */ 1749 while (high-low > 5) { 1750 t = (low+high)/2; 1751 if (rp[t] > bcol) high = t; 1752 else low = t; 1753 } 1754 for (i=low; i<high; i++) { 1755 if (rp[i] > bcol) break; 1756 if (rp[i] == bcol) { 1757 *v++ = ap[bs2*i+bs*cidx+ridx]; 1758 goto finished; 1759 } 1760 } 1761 *v++ = 0.0; 1762 finished:; 1763 } 1764 } 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1769 { 1770 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1771 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1772 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1773 PetscErrorCode ierr; 1774 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1775 PetscBool roworiented=a->roworiented; 1776 const PetscScalar *value = v; 1777 MatScalar *ap=NULL,*aa = a->a,*bap; 1778 1779 PetscFunctionBegin; 1780 if (roworiented) { 1781 stepval = (n-1)*bs; 1782 } else { 1783 stepval = (m-1)*bs; 1784 } 1785 for (k=0; k<m; k++) { /* loop over added rows */ 1786 row = im[k]; 1787 if (row < 0) continue; 1788 #if defined(PETSC_USE_DEBUG) 1789 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1790 #endif 1791 rp = aj + ai[row]; 1792 if (!A->structure_only) ap = aa + bs2*ai[row]; 1793 rmax = imax[row]; 1794 nrow = ailen[row]; 1795 low = 0; 1796 high = nrow; 1797 for (l=0; l<n; l++) { /* loop over added columns */ 1798 if (in[l] < 0) continue; 1799 #if defined(PETSC_USE_DEBUG) 1800 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1801 #endif 1802 col = in[l]; 1803 if (!A->structure_only) { 1804 if (roworiented) { 1805 value = v + (k*(stepval+bs) + l)*bs; 1806 } else { 1807 value = v + (l*(stepval+bs) + k)*bs; 1808 } 1809 } 1810 if (col <= lastcol) low = 0; 1811 else high = nrow; 1812 lastcol = col; 1813 while (high-low > 7) { 1814 t = (low+high)/2; 1815 if (rp[t] > col) high = t; 1816 else low = t; 1817 } 1818 for (i=low; i<high; i++) { 1819 if (rp[i] > col) break; 1820 if (rp[i] == col) { 1821 if (A->structure_only) goto noinsert2; 1822 bap = ap + bs2*i; 1823 if (roworiented) { 1824 if (is == ADD_VALUES) { 1825 for (ii=0; ii<bs; ii++,value+=stepval) { 1826 for (jj=ii; jj<bs2; jj+=bs) { 1827 bap[jj] += *value++; 1828 } 1829 } 1830 } else { 1831 for (ii=0; ii<bs; ii++,value+=stepval) { 1832 for (jj=ii; jj<bs2; jj+=bs) { 1833 bap[jj] = *value++; 1834 } 1835 } 1836 } 1837 } else { 1838 if (is == ADD_VALUES) { 1839 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1840 for (jj=0; jj<bs; jj++) { 1841 bap[jj] += value[jj]; 1842 } 1843 bap += bs; 1844 } 1845 } else { 1846 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1847 for (jj=0; jj<bs; jj++) { 1848 bap[jj] = value[jj]; 1849 } 1850 bap += bs; 1851 } 1852 } 1853 } 1854 goto noinsert2; 1855 } 1856 } 1857 if (nonew == 1) goto noinsert2; 1858 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1859 if (A->structure_only) { 1860 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1861 } else { 1862 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1863 } 1864 N = nrow++ - 1; high++; 1865 /* shift up all the later entries in this row */ 1866 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 1867 rp[i] = col; 1868 if (!A->structure_only) { 1869 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 1870 bap = ap + bs2*i; 1871 if (roworiented) { 1872 for (ii=0; ii<bs; ii++,value+=stepval) { 1873 for (jj=ii; jj<bs2; jj+=bs) { 1874 bap[jj] = *value++; 1875 } 1876 } 1877 } else { 1878 for (ii=0; ii<bs; ii++,value+=stepval) { 1879 for (jj=0; jj<bs; jj++) { 1880 *bap++ = *value++; 1881 } 1882 } 1883 } 1884 } 1885 noinsert2:; 1886 low = i; 1887 } 1888 ailen[row] = nrow; 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1894 { 1895 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1896 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1897 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1898 PetscErrorCode ierr; 1899 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1900 MatScalar *aa = a->a,*ap; 1901 PetscReal ratio=0.6; 1902 1903 PetscFunctionBegin; 1904 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1905 1906 if (m) rmax = ailen[0]; 1907 for (i=1; i<mbs; i++) { 1908 /* move each row back by the amount of empty slots (fshift) before it*/ 1909 fshift += imax[i-1] - ailen[i-1]; 1910 rmax = PetscMax(rmax,ailen[i]); 1911 if (fshift) { 1912 ip = aj + ai[i]; 1913 ap = aa + bs2*ai[i]; 1914 N = ailen[i]; 1915 ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr); 1916 if (!A->structure_only) { 1917 ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr); 1918 } 1919 } 1920 ai[i] = ai[i-1] + ailen[i-1]; 1921 } 1922 if (mbs) { 1923 fshift += imax[mbs-1] - ailen[mbs-1]; 1924 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1925 } 1926 1927 /* reset ilen and imax for each row */ 1928 a->nonzerorowcnt = 0; 1929 if (A->structure_only) { 1930 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 1931 } else { /* !A->structure_only */ 1932 for (i=0; i<mbs; i++) { 1933 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1934 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1935 } 1936 } 1937 a->nz = ai[mbs]; 1938 1939 /* diagonals may have moved, so kill the diagonal pointers */ 1940 a->idiagvalid = PETSC_FALSE; 1941 if (fshift && a->diag) { 1942 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1943 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1944 a->diag = 0; 1945 } 1946 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1947 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1948 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1949 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1950 1951 A->info.mallocs += a->reallocs; 1952 a->reallocs = 0; 1953 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1954 a->rmax = rmax; 1955 1956 if (!A->structure_only) { 1957 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1958 } 1959 PetscFunctionReturn(0); 1960 } 1961 1962 /* 1963 This function returns an array of flags which indicate the locations of contiguous 1964 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1965 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1966 Assume: sizes should be long enough to hold all the values. 1967 */ 1968 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1969 { 1970 PetscInt i,j,k,row; 1971 PetscBool flg; 1972 1973 PetscFunctionBegin; 1974 for (i=0,j=0; i<n; j++) { 1975 row = idx[i]; 1976 if (row%bs!=0) { /* Not the begining of a block */ 1977 sizes[j] = 1; 1978 i++; 1979 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1980 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1981 i++; 1982 } else { /* Begining of the block, so check if the complete block exists */ 1983 flg = PETSC_TRUE; 1984 for (k=1; k<bs; k++) { 1985 if (row+k != idx[i+k]) { /* break in the block */ 1986 flg = PETSC_FALSE; 1987 break; 1988 } 1989 } 1990 if (flg) { /* No break in the bs */ 1991 sizes[j] = bs; 1992 i += bs; 1993 } else { 1994 sizes[j] = 1; 1995 i++; 1996 } 1997 } 1998 } 1999 *bs_max = j; 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2004 { 2005 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2006 PetscErrorCode ierr; 2007 PetscInt i,j,k,count,*rows; 2008 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2009 PetscScalar zero = 0.0; 2010 MatScalar *aa; 2011 const PetscScalar *xx; 2012 PetscScalar *bb; 2013 2014 PetscFunctionBegin; 2015 /* fix right hand side if needed */ 2016 if (x && b) { 2017 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2018 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2019 for (i=0; i<is_n; i++) { 2020 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2021 } 2022 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2023 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2024 } 2025 2026 /* Make a copy of the IS and sort it */ 2027 /* allocate memory for rows,sizes */ 2028 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2029 2030 /* copy IS values to rows, and sort them */ 2031 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2032 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2033 2034 if (baij->keepnonzeropattern) { 2035 for (i=0; i<is_n; i++) sizes[i] = 1; 2036 bs_max = is_n; 2037 } else { 2038 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2039 A->nonzerostate++; 2040 } 2041 2042 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2043 row = rows[j]; 2044 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2045 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2046 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2047 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2048 if (diag != (PetscScalar)0.0) { 2049 if (baij->ilen[row/bs] > 0) { 2050 baij->ilen[row/bs] = 1; 2051 baij->j[baij->i[row/bs]] = row/bs; 2052 2053 ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr); 2054 } 2055 /* Now insert all the diagonal values for this bs */ 2056 for (k=0; k<bs; k++) { 2057 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2058 } 2059 } else { /* (diag == 0.0) */ 2060 baij->ilen[row/bs] = 0; 2061 } /* end (diag == 0.0) */ 2062 } else { /* (sizes[i] != bs) */ 2063 #if defined(PETSC_USE_DEBUG) 2064 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2065 #endif 2066 for (k=0; k<count; k++) { 2067 aa[0] = zero; 2068 aa += bs; 2069 } 2070 if (diag != (PetscScalar)0.0) { 2071 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2072 } 2073 } 2074 } 2075 2076 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2077 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 PetscFunctionReturn(0); 2079 } 2080 2081 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2082 { 2083 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2084 PetscErrorCode ierr; 2085 PetscInt i,j,k,count; 2086 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2087 PetscScalar zero = 0.0; 2088 MatScalar *aa; 2089 const PetscScalar *xx; 2090 PetscScalar *bb; 2091 PetscBool *zeroed,vecs = PETSC_FALSE; 2092 2093 PetscFunctionBegin; 2094 /* fix right hand side if needed */ 2095 if (x && b) { 2096 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2097 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2098 vecs = PETSC_TRUE; 2099 } 2100 2101 /* zero the columns */ 2102 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2103 for (i=0; i<is_n; i++) { 2104 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2105 zeroed[is_idx[i]] = PETSC_TRUE; 2106 } 2107 for (i=0; i<A->rmap->N; i++) { 2108 if (!zeroed[i]) { 2109 row = i/bs; 2110 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2111 for (k=0; k<bs; k++) { 2112 col = bs*baij->j[j] + k; 2113 if (zeroed[col]) { 2114 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2115 if (vecs) bb[i] -= aa[0]*xx[col]; 2116 aa[0] = 0.0; 2117 } 2118 } 2119 } 2120 } else if (vecs) bb[i] = diag*xx[i]; 2121 } 2122 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2123 if (vecs) { 2124 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2125 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2126 } 2127 2128 /* zero the rows */ 2129 for (i=0; i<is_n; i++) { 2130 row = is_idx[i]; 2131 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2132 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2133 for (k=0; k<count; k++) { 2134 aa[0] = zero; 2135 aa += bs; 2136 } 2137 if (diag != (PetscScalar)0.0) { 2138 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2139 } 2140 } 2141 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2146 { 2147 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2148 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2149 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2150 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2151 PetscErrorCode ierr; 2152 PetscInt ridx,cidx,bs2=a->bs2; 2153 PetscBool roworiented=a->roworiented; 2154 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2155 2156 PetscFunctionBegin; 2157 for (k=0; k<m; k++) { /* loop over added rows */ 2158 row = im[k]; 2159 brow = row/bs; 2160 if (row < 0) continue; 2161 #if defined(PETSC_USE_DEBUG) 2162 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2163 #endif 2164 rp = aj + ai[brow]; 2165 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2166 rmax = imax[brow]; 2167 nrow = ailen[brow]; 2168 low = 0; 2169 high = nrow; 2170 for (l=0; l<n; l++) { /* loop over added columns */ 2171 if (in[l] < 0) continue; 2172 #if defined(PETSC_USE_DEBUG) 2173 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2174 #endif 2175 col = in[l]; bcol = col/bs; 2176 ridx = row % bs; cidx = col % bs; 2177 if (!A->structure_only) { 2178 if (roworiented) { 2179 value = v[l + k*n]; 2180 } else { 2181 value = v[k + l*m]; 2182 } 2183 } 2184 if (col <= lastcol) low = 0; else high = nrow; 2185 lastcol = col; 2186 while (high-low > 7) { 2187 t = (low+high)/2; 2188 if (rp[t] > bcol) high = t; 2189 else low = t; 2190 } 2191 for (i=low; i<high; i++) { 2192 if (rp[i] > bcol) break; 2193 if (rp[i] == bcol) { 2194 bap = ap + bs2*i + bs*cidx + ridx; 2195 if (!A->structure_only) { 2196 if (is == ADD_VALUES) *bap += value; 2197 else *bap = value; 2198 } 2199 goto noinsert1; 2200 } 2201 } 2202 if (nonew == 1) goto noinsert1; 2203 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2204 if (A->structure_only) { 2205 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2206 } else { 2207 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2208 } 2209 N = nrow++ - 1; high++; 2210 /* shift up all the later entries in this row */ 2211 ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 2212 rp[i] = bcol; 2213 if (!A->structure_only) { 2214 ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 2215 ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr); 2216 ap[bs2*i + bs*cidx + ridx] = value; 2217 } 2218 a->nz++; 2219 A->nonzerostate++; 2220 noinsert1:; 2221 low = i; 2222 } 2223 ailen[brow] = nrow; 2224 } 2225 PetscFunctionReturn(0); 2226 } 2227 2228 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2229 { 2230 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2231 Mat outA; 2232 PetscErrorCode ierr; 2233 PetscBool row_identity,col_identity; 2234 2235 PetscFunctionBegin; 2236 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2237 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2238 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2239 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2240 2241 outA = inA; 2242 inA->factortype = MAT_FACTOR_LU; 2243 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2244 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2245 2246 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2247 2248 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2249 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2250 a->row = row; 2251 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2252 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2253 a->col = col; 2254 2255 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2256 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2257 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2258 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2259 2260 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2261 if (!a->solve_work) { 2262 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2263 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2264 } 2265 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2270 { 2271 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2272 PetscInt i,nz,mbs; 2273 2274 PetscFunctionBegin; 2275 nz = baij->maxnz; 2276 mbs = baij->mbs; 2277 for (i=0; i<nz; i++) { 2278 baij->j[i] = indices[i]; 2279 } 2280 baij->nz = nz; 2281 for (i=0; i<mbs; i++) { 2282 baij->ilen[i] = baij->imax[i]; 2283 } 2284 PetscFunctionReturn(0); 2285 } 2286 2287 /*@ 2288 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2289 in the matrix. 2290 2291 Input Parameters: 2292 + mat - the SeqBAIJ matrix 2293 - indices - the column indices 2294 2295 Level: advanced 2296 2297 Notes: 2298 This can be called if you have precomputed the nonzero structure of the 2299 matrix and want to provide it to the matrix object to improve the performance 2300 of the MatSetValues() operation. 2301 2302 You MUST have set the correct numbers of nonzeros per row in the call to 2303 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2304 2305 MUST be called before any calls to MatSetValues(); 2306 2307 @*/ 2308 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2309 { 2310 PetscErrorCode ierr; 2311 2312 PetscFunctionBegin; 2313 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2314 PetscValidPointer(indices,2); 2315 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2320 { 2321 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2322 PetscErrorCode ierr; 2323 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2324 PetscReal atmp; 2325 PetscScalar *x,zero = 0.0; 2326 MatScalar *aa; 2327 PetscInt ncols,brow,krow,kcol; 2328 2329 PetscFunctionBegin; 2330 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2331 bs = A->rmap->bs; 2332 aa = a->a; 2333 ai = a->i; 2334 aj = a->j; 2335 mbs = a->mbs; 2336 2337 ierr = VecSet(v,zero);CHKERRQ(ierr); 2338 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2339 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2340 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2341 for (i=0; i<mbs; i++) { 2342 ncols = ai[1] - ai[0]; ai++; 2343 brow = bs*i; 2344 for (j=0; j<ncols; j++) { 2345 for (kcol=0; kcol<bs; kcol++) { 2346 for (krow=0; krow<bs; krow++) { 2347 atmp = PetscAbsScalar(*aa);aa++; 2348 row = brow + krow; /* row index */ 2349 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2350 } 2351 } 2352 aj++; 2353 } 2354 } 2355 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2356 PetscFunctionReturn(0); 2357 } 2358 2359 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2360 { 2361 PetscErrorCode ierr; 2362 2363 PetscFunctionBegin; 2364 /* If the two matrices have the same copy implementation, use fast copy. */ 2365 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2367 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2368 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2369 2370 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2371 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2372 ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr); 2373 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2374 } else { 2375 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2376 } 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2381 { 2382 PetscErrorCode ierr; 2383 2384 PetscFunctionBegin; 2385 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2390 { 2391 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2392 2393 PetscFunctionBegin; 2394 *array = a->a; 2395 PetscFunctionReturn(0); 2396 } 2397 2398 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2399 { 2400 PetscFunctionBegin; 2401 *array = NULL; 2402 PetscFunctionReturn(0); 2403 } 2404 2405 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2406 { 2407 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2408 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2409 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2410 PetscErrorCode ierr; 2411 2412 PetscFunctionBegin; 2413 /* Set the number of nonzeros in the new matrix */ 2414 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2419 { 2420 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2421 PetscErrorCode ierr; 2422 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2423 PetscBLASInt one=1; 2424 2425 PetscFunctionBegin; 2426 if (str == SAME_NONZERO_PATTERN) { 2427 PetscScalar alpha = a; 2428 PetscBLASInt bnz; 2429 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2430 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2431 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2432 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2433 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2434 } else { 2435 Mat B; 2436 PetscInt *nnz; 2437 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2438 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2439 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2440 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2441 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2442 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2443 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2444 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2445 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2446 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2447 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2448 ierr = PetscFree(nnz);CHKERRQ(ierr); 2449 } 2450 PetscFunctionReturn(0); 2451 } 2452 2453 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2454 { 2455 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2456 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2457 MatScalar *aa = a->a; 2458 2459 PetscFunctionBegin; 2460 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2461 PetscFunctionReturn(0); 2462 } 2463 2464 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2465 { 2466 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2467 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2468 MatScalar *aa = a->a; 2469 2470 PetscFunctionBegin; 2471 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 /* 2476 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2477 */ 2478 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2479 { 2480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2481 PetscErrorCode ierr; 2482 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2483 PetscInt nz = a->i[m],row,*jj,mr,col; 2484 2485 PetscFunctionBegin; 2486 *nn = n; 2487 if (!ia) PetscFunctionReturn(0); 2488 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2489 else { 2490 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2491 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2492 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2493 jj = a->j; 2494 for (i=0; i<nz; i++) { 2495 collengths[jj[i]]++; 2496 } 2497 cia[0] = oshift; 2498 for (i=0; i<n; i++) { 2499 cia[i+1] = cia[i] + collengths[i]; 2500 } 2501 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2502 jj = a->j; 2503 for (row=0; row<m; row++) { 2504 mr = a->i[row+1] - a->i[row]; 2505 for (i=0; i<mr; i++) { 2506 col = *jj++; 2507 2508 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2509 } 2510 } 2511 ierr = PetscFree(collengths);CHKERRQ(ierr); 2512 *ia = cia; *ja = cja; 2513 } 2514 PetscFunctionReturn(0); 2515 } 2516 2517 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2518 { 2519 PetscErrorCode ierr; 2520 2521 PetscFunctionBegin; 2522 if (!ia) PetscFunctionReturn(0); 2523 ierr = PetscFree(*ia);CHKERRQ(ierr); 2524 ierr = PetscFree(*ja);CHKERRQ(ierr); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 /* 2529 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2530 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2531 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2532 */ 2533 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2534 { 2535 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2536 PetscErrorCode ierr; 2537 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2538 PetscInt nz = a->i[m],row,*jj,mr,col; 2539 PetscInt *cspidx; 2540 2541 PetscFunctionBegin; 2542 *nn = n; 2543 if (!ia) PetscFunctionReturn(0); 2544 2545 ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr); 2546 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2547 ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr); 2549 jj = a->j; 2550 for (i=0; i<nz; i++) { 2551 collengths[jj[i]]++; 2552 } 2553 cia[0] = oshift; 2554 for (i=0; i<n; i++) { 2555 cia[i+1] = cia[i] + collengths[i]; 2556 } 2557 ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr); 2558 jj = a->j; 2559 for (row=0; row<m; row++) { 2560 mr = a->i[row+1] - a->i[row]; 2561 for (i=0; i<mr; i++) { 2562 col = *jj++; 2563 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2564 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2565 } 2566 } 2567 ierr = PetscFree(collengths);CHKERRQ(ierr); 2568 *ia = cia; 2569 *ja = cja; 2570 *spidx = cspidx; 2571 PetscFunctionReturn(0); 2572 } 2573 2574 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2575 { 2576 PetscErrorCode ierr; 2577 2578 PetscFunctionBegin; 2579 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2580 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2581 PetscFunctionReturn(0); 2582 } 2583 2584 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2585 { 2586 PetscErrorCode ierr; 2587 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2588 2589 PetscFunctionBegin; 2590 if (!Y->preallocated || !aij->nz) { 2591 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2592 } 2593 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 /* -------------------------------------------------------------------*/ 2598 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2599 MatGetRow_SeqBAIJ, 2600 MatRestoreRow_SeqBAIJ, 2601 MatMult_SeqBAIJ_N, 2602 /* 4*/ MatMultAdd_SeqBAIJ_N, 2603 MatMultTranspose_SeqBAIJ, 2604 MatMultTransposeAdd_SeqBAIJ, 2605 0, 2606 0, 2607 0, 2608 /* 10*/ 0, 2609 MatLUFactor_SeqBAIJ, 2610 0, 2611 0, 2612 MatTranspose_SeqBAIJ, 2613 /* 15*/ MatGetInfo_SeqBAIJ, 2614 MatEqual_SeqBAIJ, 2615 MatGetDiagonal_SeqBAIJ, 2616 MatDiagonalScale_SeqBAIJ, 2617 MatNorm_SeqBAIJ, 2618 /* 20*/ 0, 2619 MatAssemblyEnd_SeqBAIJ, 2620 MatSetOption_SeqBAIJ, 2621 MatZeroEntries_SeqBAIJ, 2622 /* 24*/ MatZeroRows_SeqBAIJ, 2623 0, 2624 0, 2625 0, 2626 0, 2627 /* 29*/ MatSetUp_SeqBAIJ, 2628 0, 2629 0, 2630 0, 2631 0, 2632 /* 34*/ MatDuplicate_SeqBAIJ, 2633 0, 2634 0, 2635 MatILUFactor_SeqBAIJ, 2636 0, 2637 /* 39*/ MatAXPY_SeqBAIJ, 2638 MatCreateSubMatrices_SeqBAIJ, 2639 MatIncreaseOverlap_SeqBAIJ, 2640 MatGetValues_SeqBAIJ, 2641 MatCopy_SeqBAIJ, 2642 /* 44*/ 0, 2643 MatScale_SeqBAIJ, 2644 MatShift_SeqBAIJ, 2645 0, 2646 MatZeroRowsColumns_SeqBAIJ, 2647 /* 49*/ 0, 2648 MatGetRowIJ_SeqBAIJ, 2649 MatRestoreRowIJ_SeqBAIJ, 2650 MatGetColumnIJ_SeqBAIJ, 2651 MatRestoreColumnIJ_SeqBAIJ, 2652 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2653 0, 2654 0, 2655 0, 2656 MatSetValuesBlocked_SeqBAIJ, 2657 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2658 MatDestroy_SeqBAIJ, 2659 MatView_SeqBAIJ, 2660 0, 2661 0, 2662 /* 64*/ 0, 2663 0, 2664 0, 2665 0, 2666 0, 2667 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2668 0, 2669 MatConvert_Basic, 2670 0, 2671 0, 2672 /* 74*/ 0, 2673 MatFDColoringApply_BAIJ, 2674 0, 2675 0, 2676 0, 2677 /* 79*/ 0, 2678 0, 2679 0, 2680 0, 2681 MatLoad_SeqBAIJ, 2682 /* 84*/ 0, 2683 0, 2684 0, 2685 0, 2686 0, 2687 /* 89*/ 0, 2688 0, 2689 0, 2690 0, 2691 0, 2692 /* 94*/ 0, 2693 0, 2694 0, 2695 0, 2696 0, 2697 /* 99*/ 0, 2698 0, 2699 0, 2700 0, 2701 0, 2702 /*104*/ 0, 2703 MatRealPart_SeqBAIJ, 2704 MatImaginaryPart_SeqBAIJ, 2705 0, 2706 0, 2707 /*109*/ 0, 2708 0, 2709 0, 2710 0, 2711 MatMissingDiagonal_SeqBAIJ, 2712 /*114*/ 0, 2713 0, 2714 0, 2715 0, 2716 0, 2717 /*119*/ 0, 2718 0, 2719 MatMultHermitianTranspose_SeqBAIJ, 2720 MatMultHermitianTransposeAdd_SeqBAIJ, 2721 0, 2722 /*124*/ 0, 2723 0, 2724 MatInvertBlockDiagonal_SeqBAIJ, 2725 0, 2726 0, 2727 /*129*/ 0, 2728 0, 2729 0, 2730 0, 2731 0, 2732 /*134*/ 0, 2733 0, 2734 0, 2735 0, 2736 0, 2737 /*139*/ MatSetBlockSizes_Default, 2738 0, 2739 0, 2740 MatFDColoringSetUp_SeqXAIJ, 2741 0, 2742 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2743 MatDestroySubMatrices_SeqBAIJ 2744 }; 2745 2746 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2747 { 2748 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2749 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2750 PetscErrorCode ierr; 2751 2752 PetscFunctionBegin; 2753 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2754 2755 /* allocate space for values if not already there */ 2756 if (!aij->saved_values) { 2757 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2758 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2759 } 2760 2761 /* copy values over */ 2762 ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr); 2763 PetscFunctionReturn(0); 2764 } 2765 2766 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2767 { 2768 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2769 PetscErrorCode ierr; 2770 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2771 2772 PetscFunctionBegin; 2773 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2774 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2775 2776 /* copy values over */ 2777 ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr); 2778 PetscFunctionReturn(0); 2779 } 2780 2781 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2783 2784 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2785 { 2786 Mat_SeqBAIJ *b; 2787 PetscErrorCode ierr; 2788 PetscInt i,mbs,nbs,bs2; 2789 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2790 2791 PetscFunctionBegin; 2792 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2793 if (nz == MAT_SKIP_ALLOCATION) { 2794 skipallocation = PETSC_TRUE; 2795 nz = 0; 2796 } 2797 2798 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2799 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2800 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2801 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2802 2803 B->preallocated = PETSC_TRUE; 2804 2805 mbs = B->rmap->n/bs; 2806 nbs = B->cmap->n/bs; 2807 bs2 = bs*bs; 2808 2809 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2810 2811 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2812 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2813 if (nnz) { 2814 for (i=0; i<mbs; i++) { 2815 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2816 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2817 } 2818 } 2819 2820 b = (Mat_SeqBAIJ*)B->data; 2821 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2822 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2823 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2824 2825 if (!flg) { 2826 switch (bs) { 2827 case 1: 2828 B->ops->mult = MatMult_SeqBAIJ_1; 2829 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2830 break; 2831 case 2: 2832 B->ops->mult = MatMult_SeqBAIJ_2; 2833 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2834 break; 2835 case 3: 2836 B->ops->mult = MatMult_SeqBAIJ_3; 2837 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2838 break; 2839 case 4: 2840 B->ops->mult = MatMult_SeqBAIJ_4; 2841 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2842 break; 2843 case 5: 2844 B->ops->mult = MatMult_SeqBAIJ_5; 2845 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2846 break; 2847 case 6: 2848 B->ops->mult = MatMult_SeqBAIJ_6; 2849 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2850 break; 2851 case 7: 2852 B->ops->mult = MatMult_SeqBAIJ_7; 2853 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2854 break; 2855 case 9: 2856 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2857 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2858 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2859 #else 2860 B->ops->mult = MatMult_SeqBAIJ_N; 2861 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2862 #endif 2863 break; 2864 case 11: 2865 B->ops->mult = MatMult_SeqBAIJ_11; 2866 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2867 break; 2868 case 15: 2869 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2870 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2871 break; 2872 default: 2873 B->ops->mult = MatMult_SeqBAIJ_N; 2874 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2875 break; 2876 } 2877 } 2878 B->ops->sor = MatSOR_SeqBAIJ; 2879 b->mbs = mbs; 2880 b->nbs = nbs; 2881 if (!skipallocation) { 2882 if (!b->imax) { 2883 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2884 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2885 2886 b->free_imax_ilen = PETSC_TRUE; 2887 } 2888 /* b->ilen will count nonzeros in each block row so far. */ 2889 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2890 if (!nnz) { 2891 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2892 else if (nz < 0) nz = 1; 2893 nz = PetscMin(nz,nbs); 2894 for (i=0; i<mbs; i++) b->imax[i] = nz; 2895 nz = nz*mbs; 2896 } else { 2897 PetscInt64 nz64 = 0; 2898 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 2899 ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr); 2900 } 2901 2902 /* allocate the matrix space */ 2903 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2904 if (B->structure_only) { 2905 ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr); 2906 ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr); 2907 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr); 2908 } else { 2909 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2910 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2911 ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr); 2912 } 2913 ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr); 2914 2915 if (B->structure_only) { 2916 b->singlemalloc = PETSC_FALSE; 2917 b->free_a = PETSC_FALSE; 2918 } else { 2919 b->singlemalloc = PETSC_TRUE; 2920 b->free_a = PETSC_TRUE; 2921 } 2922 b->free_ij = PETSC_TRUE; 2923 2924 b->i[0] = 0; 2925 for (i=1; i<mbs+1; i++) { 2926 b->i[i] = b->i[i-1] + b->imax[i-1]; 2927 } 2928 2929 } else { 2930 b->free_a = PETSC_FALSE; 2931 b->free_ij = PETSC_FALSE; 2932 } 2933 2934 b->bs2 = bs2; 2935 b->mbs = mbs; 2936 b->nz = 0; 2937 b->maxnz = nz; 2938 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2939 B->was_assembled = PETSC_FALSE; 2940 B->assembled = PETSC_FALSE; 2941 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2942 PetscFunctionReturn(0); 2943 } 2944 2945 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2946 { 2947 PetscInt i,m,nz,nz_max=0,*nnz; 2948 PetscScalar *values=0; 2949 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2950 PetscErrorCode ierr; 2951 2952 PetscFunctionBegin; 2953 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2954 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2955 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2956 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2957 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2958 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2959 m = B->rmap->n/bs; 2960 2961 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2962 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2963 for (i=0; i<m; i++) { 2964 nz = ii[i+1]- ii[i]; 2965 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2966 nz_max = PetscMax(nz_max, nz); 2967 nnz[i] = nz; 2968 } 2969 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2970 ierr = PetscFree(nnz);CHKERRQ(ierr); 2971 2972 values = (PetscScalar*)V; 2973 if (!values) { 2974 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2975 } 2976 for (i=0; i<m; i++) { 2977 PetscInt ncols = ii[i+1] - ii[i]; 2978 const PetscInt *icols = jj + ii[i]; 2979 if (bs == 1 || !roworiented) { 2980 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2981 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2982 } else { 2983 PetscInt j; 2984 for (j=0; j<ncols; j++) { 2985 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2986 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2987 } 2988 } 2989 } 2990 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2991 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2992 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2993 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2994 PetscFunctionReturn(0); 2995 } 2996 2997 /*@C 2998 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 2999 3000 Not Collective 3001 3002 Input Parameter: 3003 . mat - a MATSEQBAIJ matrix 3004 3005 Output Parameter: 3006 . array - pointer to the data 3007 3008 Level: intermediate 3009 3010 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3011 @*/ 3012 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3013 { 3014 PetscErrorCode ierr; 3015 3016 PetscFunctionBegin; 3017 ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3018 PetscFunctionReturn(0); 3019 } 3020 3021 /*@C 3022 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3023 3024 Not Collective 3025 3026 Input Parameters: 3027 + mat - a MATSEQBAIJ matrix 3028 - array - pointer to the data 3029 3030 Level: intermediate 3031 3032 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray() 3033 @*/ 3034 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3035 { 3036 PetscErrorCode ierr; 3037 3038 PetscFunctionBegin; 3039 ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr); 3040 PetscFunctionReturn(0); 3041 } 3042 3043 /*MC 3044 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3045 block sparse compressed row format. 3046 3047 Options Database Keys: 3048 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3049 3050 Level: beginner 3051 3052 Notes: 3053 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3054 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3055 3056 .seealso: MatCreateSeqBAIJ() 3057 M*/ 3058 3059 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3060 3061 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3062 { 3063 PetscErrorCode ierr; 3064 PetscMPIInt size; 3065 Mat_SeqBAIJ *b; 3066 3067 PetscFunctionBegin; 3068 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3069 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3070 3071 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3072 B->data = (void*)b; 3073 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3074 3075 b->row = 0; 3076 b->col = 0; 3077 b->icol = 0; 3078 b->reallocs = 0; 3079 b->saved_values = 0; 3080 3081 b->roworiented = PETSC_TRUE; 3082 b->nonew = 0; 3083 b->diag = 0; 3084 B->spptr = 0; 3085 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3086 b->keepnonzeropattern = PETSC_FALSE; 3087 3088 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr); 3089 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr); 3090 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3091 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3092 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3093 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3094 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3095 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3096 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3097 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3098 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3099 #if defined(PETSC_HAVE_HYPRE) 3100 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3101 #endif 3102 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 3103 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 3104 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3105 PetscFunctionReturn(0); 3106 } 3107 3108 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3109 { 3110 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3111 PetscErrorCode ierr; 3112 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3113 3114 PetscFunctionBegin; 3115 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3116 3117 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3118 c->imax = a->imax; 3119 c->ilen = a->ilen; 3120 c->free_imax_ilen = PETSC_FALSE; 3121 } else { 3122 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3123 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3124 for (i=0; i<mbs; i++) { 3125 c->imax[i] = a->imax[i]; 3126 c->ilen[i] = a->ilen[i]; 3127 } 3128 c->free_imax_ilen = PETSC_TRUE; 3129 } 3130 3131 /* allocate the matrix space */ 3132 if (mallocmatspace) { 3133 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3134 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3135 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3136 3137 c->i = a->i; 3138 c->j = a->j; 3139 c->singlemalloc = PETSC_FALSE; 3140 c->free_a = PETSC_TRUE; 3141 c->free_ij = PETSC_FALSE; 3142 c->parent = A; 3143 C->preallocated = PETSC_TRUE; 3144 C->assembled = PETSC_TRUE; 3145 3146 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3147 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3148 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3149 } else { 3150 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3151 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3152 3153 c->singlemalloc = PETSC_TRUE; 3154 c->free_a = PETSC_TRUE; 3155 c->free_ij = PETSC_TRUE; 3156 3157 ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr); 3158 if (mbs > 0) { 3159 ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr); 3160 if (cpvalues == MAT_COPY_VALUES) { 3161 ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr); 3162 } else { 3163 ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr); 3164 } 3165 } 3166 C->preallocated = PETSC_TRUE; 3167 C->assembled = PETSC_TRUE; 3168 } 3169 } 3170 3171 c->roworiented = a->roworiented; 3172 c->nonew = a->nonew; 3173 3174 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3175 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3176 3177 c->bs2 = a->bs2; 3178 c->mbs = a->mbs; 3179 c->nbs = a->nbs; 3180 3181 if (a->diag) { 3182 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3183 c->diag = a->diag; 3184 c->free_diag = PETSC_FALSE; 3185 } else { 3186 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3187 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3188 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3189 c->free_diag = PETSC_TRUE; 3190 } 3191 } else c->diag = 0; 3192 3193 c->nz = a->nz; 3194 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3195 c->solve_work = NULL; 3196 c->mult_work = NULL; 3197 c->sor_workt = NULL; 3198 c->sor_work = NULL; 3199 3200 c->compressedrow.use = a->compressedrow.use; 3201 c->compressedrow.nrows = a->compressedrow.nrows; 3202 if (a->compressedrow.use) { 3203 i = a->compressedrow.nrows; 3204 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3205 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3206 ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr); 3207 ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr); 3208 } else { 3209 c->compressedrow.use = PETSC_FALSE; 3210 c->compressedrow.i = NULL; 3211 c->compressedrow.rindex = NULL; 3212 } 3213 C->nonzerostate = A->nonzerostate; 3214 3215 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3216 PetscFunctionReturn(0); 3217 } 3218 3219 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3220 { 3221 PetscErrorCode ierr; 3222 3223 PetscFunctionBegin; 3224 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3225 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3226 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3227 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3228 PetscFunctionReturn(0); 3229 } 3230 3231 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3232 { 3233 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3234 PetscInt *rowidxs,*colidxs; 3235 PetscScalar *matvals; 3236 PetscErrorCode ierr; 3237 3238 PetscFunctionBegin; 3239 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3240 3241 /* read matrix header */ 3242 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3243 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3244 M = header[1]; N = header[2]; nz = header[3]; 3245 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3246 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3247 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3248 3249 /* set block sizes from the viewer's .info file */ 3250 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3251 /* set local and global sizes if not set already */ 3252 if (mat->rmap->n < 0) mat->rmap->n = M; 3253 if (mat->cmap->n < 0) mat->cmap->n = N; 3254 if (mat->rmap->N < 0) mat->rmap->N = M; 3255 if (mat->cmap->N < 0) mat->cmap->N = N; 3256 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3257 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3258 3259 /* check if the matrix sizes are correct */ 3260 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3261 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3262 ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3263 ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3264 mbs = m/bs; nbs = n/bs; 3265 3266 /* read in row lengths, column indices and nonzero values */ 3267 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3268 ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr); 3269 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3270 sum = rowidxs[m]; 3271 if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3272 3273 /* read in column indices and nonzero values */ 3274 ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr); 3275 ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr); 3276 ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr); 3277 3278 { /* preallocate matrix storage */ 3279 PetscBT bt; /* helper bit set to count nonzeros */ 3280 PetscInt *nnz; 3281 3282 ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3283 ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr); 3284 for (i=0; i<mbs; i++) { 3285 ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3286 for (k=0; k<bs; k++) 3287 for (j=rowidxs[bs*i+k]; j<rowidxs[bs*i+k+1]; j++) 3288 if (!PetscBTLookupSet(bt,colidxs[j]/bs)) nnz[i]++; 3289 } 3290 ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3291 ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr); 3292 ierr = PetscFree(nnz);CHKERRQ(ierr); 3293 } 3294 3295 /* store matrix values */ 3296 for (i=0; i<m; i++) { 3297 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3298 ierr = MatSetValues_SeqBAIJ(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3299 } 3300 3301 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3302 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3303 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3304 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3305 PetscFunctionReturn(0); 3306 } 3307 3308 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3309 { 3310 PetscErrorCode ierr; 3311 PetscBool isbinary; 3312 3313 PetscFunctionBegin; 3314 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3315 if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3316 ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 3317 PetscFunctionReturn(0); 3318 } 3319 3320 /*@C 3321 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3322 compressed row) format. For good matrix assembly performance the 3323 user should preallocate the matrix storage by setting the parameter nz 3324 (or the array nnz). By setting these parameters accurately, performance 3325 during matrix assembly can be increased by more than a factor of 50. 3326 3327 Collective 3328 3329 Input Parameters: 3330 + comm - MPI communicator, set to PETSC_COMM_SELF 3331 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3332 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3333 . m - number of rows 3334 . n - number of columns 3335 . nz - number of nonzero blocks per block row (same for all rows) 3336 - nnz - array containing the number of nonzero blocks in the various block rows 3337 (possibly different for each block row) or NULL 3338 3339 Output Parameter: 3340 . A - the matrix 3341 3342 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3343 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3344 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3345 3346 Options Database Keys: 3347 + -mat_no_unroll - uses code that does not unroll the loops in the 3348 block calculations (much slower) 3349 - -mat_block_size - size of the blocks to use 3350 3351 Level: intermediate 3352 3353 Notes: 3354 The number of rows and columns must be divisible by blocksize. 3355 3356 If the nnz parameter is given then the nz parameter is ignored 3357 3358 A nonzero block is any block that as 1 or more nonzeros in it 3359 3360 The block AIJ format is fully compatible with standard Fortran 77 3361 storage. That is, the stored row and column indices can begin at 3362 either one (as in Fortran) or zero. See the users' manual for details. 3363 3364 Specify the preallocated storage with either nz or nnz (not both). 3365 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3366 allocation. See Users-Manual: ch_mat for details. 3367 matrices. 3368 3369 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3370 @*/ 3371 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3372 { 3373 PetscErrorCode ierr; 3374 3375 PetscFunctionBegin; 3376 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3377 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3378 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3379 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3380 PetscFunctionReturn(0); 3381 } 3382 3383 /*@C 3384 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3385 per row in the matrix. For good matrix assembly performance the 3386 user should preallocate the matrix storage by setting the parameter nz 3387 (or the array nnz). By setting these parameters accurately, performance 3388 during matrix assembly can be increased by more than a factor of 50. 3389 3390 Collective 3391 3392 Input Parameters: 3393 + B - the matrix 3394 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3395 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3396 . nz - number of block nonzeros per block row (same for all rows) 3397 - nnz - array containing the number of block nonzeros in the various block rows 3398 (possibly different for each block row) or NULL 3399 3400 Options Database Keys: 3401 + -mat_no_unroll - uses code that does not unroll the loops in the 3402 block calculations (much slower) 3403 - -mat_block_size - size of the blocks to use 3404 3405 Level: intermediate 3406 3407 Notes: 3408 If the nnz parameter is given then the nz parameter is ignored 3409 3410 You can call MatGetInfo() to get information on how effective the preallocation was; 3411 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3412 You can also run with the option -info and look for messages with the string 3413 malloc in them to see if additional memory allocation was needed. 3414 3415 The block AIJ format is fully compatible with standard Fortran 77 3416 storage. That is, the stored row and column indices can begin at 3417 either one (as in Fortran) or zero. See the users' manual for details. 3418 3419 Specify the preallocated storage with either nz or nnz (not both). 3420 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3421 allocation. See Users-Manual: ch_mat for details. 3422 3423 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3424 @*/ 3425 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3426 { 3427 PetscErrorCode ierr; 3428 3429 PetscFunctionBegin; 3430 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3431 PetscValidType(B,1); 3432 PetscValidLogicalCollectiveInt(B,bs,2); 3433 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3434 PetscFunctionReturn(0); 3435 } 3436 3437 /*@C 3438 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3439 3440 Collective 3441 3442 Input Parameters: 3443 + B - the matrix 3444 . i - the indices into j for the start of each local row (starts with zero) 3445 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3446 - v - optional values in the matrix 3447 3448 Level: advanced 3449 3450 Notes: 3451 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3452 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3453 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3454 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3455 block column and the second index is over columns within a block. 3456 3457 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3458 3459 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3460 @*/ 3461 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3462 { 3463 PetscErrorCode ierr; 3464 3465 PetscFunctionBegin; 3466 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3467 PetscValidType(B,1); 3468 PetscValidLogicalCollectiveInt(B,bs,2); 3469 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3470 PetscFunctionReturn(0); 3471 } 3472 3473 3474 /*@ 3475 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3476 3477 Collective 3478 3479 Input Parameters: 3480 + comm - must be an MPI communicator of size 1 3481 . bs - size of block 3482 . m - number of rows 3483 . n - number of columns 3484 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3485 . j - column indices 3486 - a - matrix values 3487 3488 Output Parameter: 3489 . mat - the matrix 3490 3491 Level: advanced 3492 3493 Notes: 3494 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3495 once the matrix is destroyed 3496 3497 You cannot set new nonzero locations into this matrix, that will generate an error. 3498 3499 The i and j indices are 0 based 3500 3501 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3502 3503 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3504 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3505 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3506 with column-major ordering within blocks. 3507 3508 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3509 3510 @*/ 3511 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3512 { 3513 PetscErrorCode ierr; 3514 PetscInt ii; 3515 Mat_SeqBAIJ *baij; 3516 3517 PetscFunctionBegin; 3518 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3519 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3520 3521 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3522 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3523 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3524 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3525 baij = (Mat_SeqBAIJ*)(*mat)->data; 3526 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3527 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3528 3529 baij->i = i; 3530 baij->j = j; 3531 baij->a = a; 3532 3533 baij->singlemalloc = PETSC_FALSE; 3534 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3535 baij->free_a = PETSC_FALSE; 3536 baij->free_ij = PETSC_FALSE; 3537 3538 for (ii=0; ii<m; ii++) { 3539 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3540 #if defined(PETSC_USE_DEBUG) 3541 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3542 #endif 3543 } 3544 #if defined(PETSC_USE_DEBUG) 3545 for (ii=0; ii<baij->i[m]; ii++) { 3546 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3547 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3548 } 3549 #endif 3550 3551 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3552 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3553 PetscFunctionReturn(0); 3554 } 3555 3556 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3557 { 3558 PetscErrorCode ierr; 3559 PetscMPIInt size; 3560 3561 PetscFunctionBegin; 3562 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3563 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3564 ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3565 } else { 3566 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3567 } 3568 PetscFunctionReturn(0); 3569 } 3570