1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 19 20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A,PetscInt type,PetscReal *reductions) 21 { 22 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) A->data; 23 PetscInt m,n,i; 24 PetscInt ib,jb,bs = A->rmap->bs; 25 MatScalar *a_val = a_aij->a; 26 27 PetscFunctionBegin; 28 PetscCall(MatGetSize(A,&m,&n)); 29 for (i=0; i<n; i++) reductions[i] = 0.0; 30 if (type == NORM_2) { 31 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 32 for (jb=0; jb<bs; jb++) { 33 for (ib=0; ib<bs; ib++) { 34 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 35 a_val++; 36 } 37 } 38 } 39 } else if (type == NORM_1) { 40 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 41 for (jb=0; jb<bs; jb++) { 42 for (ib=0; ib<bs; ib++) { 43 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 44 a_val++; 45 } 46 } 47 } 48 } else if (type == NORM_INFINITY) { 49 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 50 for (jb=0; jb<bs; jb++) { 51 for (ib=0; ib<bs; ib++) { 52 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 53 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 54 a_val++; 55 } 56 } 57 } 58 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 59 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 60 for (jb=0; jb<bs; jb++) { 61 for (ib=0; ib<bs; ib++) { 62 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 63 a_val++; 64 } 65 } 66 } 67 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 68 for (i=a_aij->i[0]; i<a_aij->i[A->rmap->n/bs]; i++) { 69 for (jb=0; jb<bs; jb++) { 70 for (ib=0; ib<bs; ib++) { 71 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 72 a_val++; 73 } 74 } 75 } 76 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 77 if (type == NORM_2) { 78 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 79 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (i=0; i<n; i++) reductions[i] /= m; 81 } 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 88 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 89 MatScalar *v = a->a,*odiag,*diag,work[25],*v_work; 90 PetscReal shift = 0.0; 91 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 92 93 PetscFunctionBegin; 94 allowzeropivot = PetscNot(A->erroriffailure); 95 96 if (a->idiagvalid) { 97 if (values) *values = a->idiag; 98 PetscFunctionReturn(0); 99 } 100 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 101 diag_offset = a->diag; 102 if (!a->idiag) { 103 PetscCall(PetscMalloc1(bs2*mbs,&a->idiag)); 104 PetscCall(PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar))); 105 } 106 diag = a->idiag; 107 if (values) *values = a->idiag; 108 /* factor and invert each block */ 109 switch (bs) { 110 case 1: 111 for (i=0; i<mbs; i++) { 112 odiag = v + 1*diag_offset[i]; 113 diag[0] = odiag[0]; 114 115 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 116 if (allowzeropivot) { 117 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 118 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 119 A->factorerror_zeropivot_row = i; 120 PetscCall(PetscInfo(A,"Zero pivot, row %" PetscInt_FMT "\n",i)); 121 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 122 } 123 124 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 125 diag += 1; 126 } 127 break; 128 case 2: 129 for (i=0; i<mbs; i++) { 130 odiag = v + 4*diag_offset[i]; 131 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 132 PetscCall(PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected)); 133 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 134 diag += 4; 135 } 136 break; 137 case 3: 138 for (i=0; i<mbs; i++) { 139 odiag = v + 9*diag_offset[i]; 140 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 141 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 142 diag[8] = odiag[8]; 143 PetscCall(PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected)); 144 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 145 diag += 9; 146 } 147 break; 148 case 4: 149 for (i=0; i<mbs; i++) { 150 odiag = v + 16*diag_offset[i]; 151 PetscCall(PetscArraycpy(diag,odiag,16)); 152 PetscCall(PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected)); 153 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 154 diag += 16; 155 } 156 break; 157 case 5: 158 for (i=0; i<mbs; i++) { 159 odiag = v + 25*diag_offset[i]; 160 PetscCall(PetscArraycpy(diag,odiag,25)); 161 PetscCall(PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 25; 164 } 165 break; 166 case 6: 167 for (i=0; i<mbs; i++) { 168 odiag = v + 36*diag_offset[i]; 169 PetscCall(PetscArraycpy(diag,odiag,36)); 170 PetscCall(PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 36; 173 } 174 break; 175 case 7: 176 for (i=0; i<mbs; i++) { 177 odiag = v + 49*diag_offset[i]; 178 PetscCall(PetscArraycpy(diag,odiag,49)); 179 PetscCall(PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 49; 182 } 183 break; 184 default: 185 PetscCall(PetscMalloc2(bs,&v_work,bs,&v_pivots)); 186 for (i=0; i<mbs; i++) { 187 odiag = v + bs2*diag_offset[i]; 188 PetscCall(PetscArraycpy(diag,odiag,bs2)); 189 PetscCall(PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected)); 190 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 191 diag += bs2; 192 } 193 PetscCall(PetscFree2(v_work,v_pivots)); 194 } 195 a->idiagvalid = PETSC_TRUE; 196 PetscFunctionReturn(0); 197 } 198 199 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 200 { 201 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 202 PetscScalar *x,*work,*w,*workt,*t; 203 const MatScalar *v,*aa = a->a, *idiag; 204 const PetscScalar *b,*xb; 205 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 206 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 207 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 208 209 PetscFunctionBegin; 210 its = its*lits; 211 PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 212 PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 213 PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 214 PetscCheck(omega == 1.0,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor"); 215 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 216 217 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A,NULL)); 218 219 if (!m) PetscFunctionReturn(0); 220 diag = a->diag; 221 idiag = a->idiag; 222 k = PetscMax(A->rmap->n,A->cmap->n); 223 if (!a->mult_work) { 224 PetscCall(PetscMalloc1(k+1,&a->mult_work)); 225 } 226 if (!a->sor_workt) { 227 PetscCall(PetscMalloc1(k,&a->sor_workt)); 228 } 229 if (!a->sor_work) { 230 PetscCall(PetscMalloc1(bs,&a->sor_work)); 231 } 232 work = a->mult_work; 233 t = a->sor_workt; 234 w = a->sor_work; 235 236 PetscCall(VecGetArray(xx,&x)); 237 PetscCall(VecGetArrayRead(bb,&b)); 238 239 if (flag & SOR_ZERO_INITIAL_GUESS) { 240 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 241 switch (bs) { 242 case 1: 243 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 244 t[0] = b[0]; 245 i2 = 1; 246 idiag += 1; 247 for (i=1; i<m; i++) { 248 v = aa + ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; 252 for (j=0; j<nz; j++) { 253 xw[0] = x[vi[j]]; 254 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 255 } 256 t[i2] = s[0]; 257 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 258 x[i2] = xw[0]; 259 idiag += 1; 260 i2 += 1; 261 } 262 break; 263 case 2: 264 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 265 t[0] = b[0]; t[1] = b[1]; 266 i2 = 2; 267 idiag += 4; 268 for (i=1; i<m; i++) { 269 v = aa + 4*ai[i]; 270 vi = aj + ai[i]; 271 nz = diag[i] - ai[i]; 272 s[0] = b[i2]; s[1] = b[i2+1]; 273 for (j=0; j<nz; j++) { 274 idx = 2*vi[j]; 275 it = 4*j; 276 xw[0] = x[idx]; xw[1] = x[1+idx]; 277 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 278 } 279 t[i2] = s[0]; t[i2+1] = s[1]; 280 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 281 x[i2] = xw[0]; x[i2+1] = xw[1]; 282 idiag += 4; 283 i2 += 2; 284 } 285 break; 286 case 3: 287 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 288 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 289 i2 = 3; 290 idiag += 9; 291 for (i=1; i<m; i++) { 292 v = aa + 9*ai[i]; 293 vi = aj + ai[i]; 294 nz = diag[i] - ai[i]; 295 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 296 while (nz--) { 297 idx = 3*(*vi++); 298 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 299 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 300 v += 9; 301 } 302 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 303 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 304 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 305 idiag += 9; 306 i2 += 3; 307 } 308 break; 309 case 4: 310 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 311 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 312 i2 = 4; 313 idiag += 16; 314 for (i=1; i<m; i++) { 315 v = aa + 16*ai[i]; 316 vi = aj + ai[i]; 317 nz = diag[i] - ai[i]; 318 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 319 while (nz--) { 320 idx = 4*(*vi++); 321 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 323 v += 16; 324 } 325 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 326 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 327 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 328 idiag += 16; 329 i2 += 4; 330 } 331 break; 332 case 5: 333 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 334 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 335 i2 = 5; 336 idiag += 25; 337 for (i=1; i<m; i++) { 338 v = aa + 25*ai[i]; 339 vi = aj + ai[i]; 340 nz = diag[i] - ai[i]; 341 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 342 while (nz--) { 343 idx = 5*(*vi++); 344 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 345 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 346 v += 25; 347 } 348 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 349 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 350 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 351 idiag += 25; 352 i2 += 5; 353 } 354 break; 355 case 6: 356 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 357 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 358 i2 = 6; 359 idiag += 36; 360 for (i=1; i<m; i++) { 361 v = aa + 36*ai[i]; 362 vi = aj + ai[i]; 363 nz = diag[i] - ai[i]; 364 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 365 while (nz--) { 366 idx = 6*(*vi++); 367 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 368 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 369 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 370 v += 36; 371 } 372 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 373 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 374 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 375 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 376 idiag += 36; 377 i2 += 6; 378 } 379 break; 380 case 7: 381 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 382 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 383 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 384 i2 = 7; 385 idiag += 49; 386 for (i=1; i<m; i++) { 387 v = aa + 49*ai[i]; 388 vi = aj + ai[i]; 389 nz = diag[i] - ai[i]; 390 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 391 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 392 while (nz--) { 393 idx = 7*(*vi++); 394 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 395 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 396 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 397 v += 49; 398 } 399 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 400 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 401 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 402 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 403 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 404 idiag += 49; 405 i2 += 7; 406 } 407 break; 408 default: 409 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 410 PetscCall(PetscArraycpy(t,b,bs)); 411 i2 = bs; 412 idiag += bs2; 413 for (i=1; i<m; i++) { 414 v = aa + bs2*ai[i]; 415 vi = aj + ai[i]; 416 nz = diag[i] - ai[i]; 417 418 PetscCall(PetscArraycpy(w,b+i2,bs)); 419 /* copy all rows of x that are needed into contiguous space */ 420 workt = work; 421 for (j=0; j<nz; j++) { 422 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 423 workt += bs; 424 } 425 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 426 PetscCall(PetscArraycpy(t+i2,w,bs)); 427 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 428 429 idiag += bs2; 430 i2 += bs; 431 } 432 break; 433 } 434 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 435 PetscCall(PetscLogFlops(1.0*bs2*a->nz)); 436 xb = t; 437 } 438 else xb = b; 439 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 440 idiag = a->idiag+bs2*(a->mbs-1); 441 i2 = bs * (m-1); 442 switch (bs) { 443 case 1: 444 s[0] = xb[i2]; 445 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 446 x[i2] = xw[0]; 447 i2 -= 1; 448 for (i=m-2; i>=0; i--) { 449 v = aa + (diag[i]+1); 450 vi = aj + diag[i] + 1; 451 nz = ai[i+1] - diag[i] - 1; 452 s[0] = xb[i2]; 453 for (j=0; j<nz; j++) { 454 xw[0] = x[vi[j]]; 455 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 456 } 457 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 458 x[i2] = xw[0]; 459 idiag -= 1; 460 i2 -= 1; 461 } 462 break; 463 case 2: 464 s[0] = xb[i2]; s[1] = xb[i2+1]; 465 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 466 x[i2] = xw[0]; x[i2+1] = xw[1]; 467 i2 -= 2; 468 idiag -= 4; 469 for (i=m-2; i>=0; i--) { 470 v = aa + 4*(diag[i] + 1); 471 vi = aj + diag[i] + 1; 472 nz = ai[i+1] - diag[i] - 1; 473 s[0] = xb[i2]; s[1] = xb[i2+1]; 474 for (j=0; j<nz; j++) { 475 idx = 2*vi[j]; 476 it = 4*j; 477 xw[0] = x[idx]; xw[1] = x[1+idx]; 478 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 479 } 480 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 481 x[i2] = xw[0]; x[i2+1] = xw[1]; 482 idiag -= 4; 483 i2 -= 2; 484 } 485 break; 486 case 3: 487 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 488 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 489 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 490 i2 -= 3; 491 idiag -= 9; 492 for (i=m-2; i>=0; i--) { 493 v = aa + 9*(diag[i]+1); 494 vi = aj + diag[i] + 1; 495 nz = ai[i+1] - diag[i] - 1; 496 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 497 while (nz--) { 498 idx = 3*(*vi++); 499 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 500 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 501 v += 9; 502 } 503 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 504 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 505 idiag -= 9; 506 i2 -= 3; 507 } 508 break; 509 case 4: 510 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 511 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 512 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 513 i2 -= 4; 514 idiag -= 16; 515 for (i=m-2; i>=0; i--) { 516 v = aa + 16*(diag[i]+1); 517 vi = aj + diag[i] + 1; 518 nz = ai[i+1] - diag[i] - 1; 519 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 520 while (nz--) { 521 idx = 4*(*vi++); 522 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 523 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 524 v += 16; 525 } 526 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 527 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 528 idiag -= 16; 529 i2 -= 4; 530 } 531 break; 532 case 5: 533 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 534 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 535 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 536 i2 -= 5; 537 idiag -= 25; 538 for (i=m-2; i>=0; i--) { 539 v = aa + 25*(diag[i]+1); 540 vi = aj + diag[i] + 1; 541 nz = ai[i+1] - diag[i] - 1; 542 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 543 while (nz--) { 544 idx = 5*(*vi++); 545 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 546 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 547 v += 25; 548 } 549 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 550 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 551 idiag -= 25; 552 i2 -= 5; 553 } 554 break; 555 case 6: 556 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 557 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 558 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 559 i2 -= 6; 560 idiag -= 36; 561 for (i=m-2; i>=0; i--) { 562 v = aa + 36*(diag[i]+1); 563 vi = aj + diag[i] + 1; 564 nz = ai[i+1] - diag[i] - 1; 565 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 566 while (nz--) { 567 idx = 6*(*vi++); 568 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 569 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 570 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 571 v += 36; 572 } 573 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 574 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 575 idiag -= 36; 576 i2 -= 6; 577 } 578 break; 579 case 7: 580 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 581 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 582 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 583 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 584 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 585 i2 -= 7; 586 idiag -= 49; 587 for (i=m-2; i>=0; i--) { 588 v = aa + 49*(diag[i]+1); 589 vi = aj + diag[i] + 1; 590 nz = ai[i+1] - diag[i] - 1; 591 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 592 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 593 while (nz--) { 594 idx = 7*(*vi++); 595 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 596 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 597 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 598 v += 49; 599 } 600 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 601 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 602 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 603 idiag -= 49; 604 i2 -= 7; 605 } 606 break; 607 default: 608 PetscCall(PetscArraycpy(w,xb+i2,bs)); 609 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 610 i2 -= bs; 611 idiag -= bs2; 612 for (i=m-2; i>=0; i--) { 613 v = aa + bs2*(diag[i]+1); 614 vi = aj + diag[i] + 1; 615 nz = ai[i+1] - diag[i] - 1; 616 617 PetscCall(PetscArraycpy(w,xb+i2,bs)); 618 /* copy all rows of x that are needed into contiguous space */ 619 workt = work; 620 for (j=0; j<nz; j++) { 621 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 622 workt += bs; 623 } 624 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 625 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 626 627 idiag -= bs2; 628 i2 -= bs; 629 } 630 break; 631 } 632 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 633 } 634 its--; 635 } 636 while (its--) { 637 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 638 idiag = a->idiag; 639 i2 = 0; 640 switch (bs) { 641 case 1: 642 for (i=0; i<m; i++) { 643 v = aa + ai[i]; 644 vi = aj + ai[i]; 645 nz = ai[i+1] - ai[i]; 646 s[0] = b[i2]; 647 for (j=0; j<nz; j++) { 648 xw[0] = x[vi[j]]; 649 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 650 } 651 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 652 x[i2] += xw[0]; 653 idiag += 1; 654 i2 += 1; 655 } 656 break; 657 case 2: 658 for (i=0; i<m; i++) { 659 v = aa + 4*ai[i]; 660 vi = aj + ai[i]; 661 nz = ai[i+1] - ai[i]; 662 s[0] = b[i2]; s[1] = b[i2+1]; 663 for (j=0; j<nz; j++) { 664 idx = 2*vi[j]; 665 it = 4*j; 666 xw[0] = x[idx]; xw[1] = x[1+idx]; 667 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 668 } 669 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 670 x[i2] += xw[0]; x[i2+1] += xw[1]; 671 idiag += 4; 672 i2 += 2; 673 } 674 break; 675 case 3: 676 for (i=0; i<m; i++) { 677 v = aa + 9*ai[i]; 678 vi = aj + ai[i]; 679 nz = ai[i+1] - ai[i]; 680 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 681 while (nz--) { 682 idx = 3*(*vi++); 683 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 684 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 685 v += 9; 686 } 687 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 688 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 689 idiag += 9; 690 i2 += 3; 691 } 692 break; 693 case 4: 694 for (i=0; i<m; i++) { 695 v = aa + 16*ai[i]; 696 vi = aj + ai[i]; 697 nz = ai[i+1] - ai[i]; 698 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 699 while (nz--) { 700 idx = 4*(*vi++); 701 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 702 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 703 v += 16; 704 } 705 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 706 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 707 idiag += 16; 708 i2 += 4; 709 } 710 break; 711 case 5: 712 for (i=0; i<m; i++) { 713 v = aa + 25*ai[i]; 714 vi = aj + ai[i]; 715 nz = ai[i+1] - ai[i]; 716 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 717 while (nz--) { 718 idx = 5*(*vi++); 719 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 720 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 721 v += 25; 722 } 723 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 724 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 725 idiag += 25; 726 i2 += 5; 727 } 728 break; 729 case 6: 730 for (i=0; i<m; i++) { 731 v = aa + 36*ai[i]; 732 vi = aj + ai[i]; 733 nz = ai[i+1] - ai[i]; 734 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 735 while (nz--) { 736 idx = 6*(*vi++); 737 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 738 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 739 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 740 v += 36; 741 } 742 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 743 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 744 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 745 idiag += 36; 746 i2 += 6; 747 } 748 break; 749 case 7: 750 for (i=0; i<m; i++) { 751 v = aa + 49*ai[i]; 752 vi = aj + ai[i]; 753 nz = ai[i+1] - ai[i]; 754 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 755 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 756 while (nz--) { 757 idx = 7*(*vi++); 758 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 759 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 760 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 761 v += 49; 762 } 763 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 764 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 765 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 766 idiag += 49; 767 i2 += 7; 768 } 769 break; 770 default: 771 for (i=0; i<m; i++) { 772 v = aa + bs2*ai[i]; 773 vi = aj + ai[i]; 774 nz = ai[i+1] - ai[i]; 775 776 PetscCall(PetscArraycpy(w,b+i2,bs)); 777 /* copy all rows of x that are needed into contiguous space */ 778 workt = work; 779 for (j=0; j<nz; j++) { 780 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 781 workt += bs; 782 } 783 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 784 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 785 786 idiag += bs2; 787 i2 += bs; 788 } 789 break; 790 } 791 PetscCall(PetscLogFlops(2.0*bs2*a->nz)); 792 } 793 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 794 idiag = a->idiag+bs2*(a->mbs-1); 795 i2 = bs * (m-1); 796 switch (bs) { 797 case 1: 798 for (i=m-1; i>=0; i--) { 799 v = aa + ai[i]; 800 vi = aj + ai[i]; 801 nz = ai[i+1] - ai[i]; 802 s[0] = b[i2]; 803 for (j=0; j<nz; j++) { 804 xw[0] = x[vi[j]]; 805 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 806 } 807 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 808 x[i2] += xw[0]; 809 idiag -= 1; 810 i2 -= 1; 811 } 812 break; 813 case 2: 814 for (i=m-1; i>=0; i--) { 815 v = aa + 4*ai[i]; 816 vi = aj + ai[i]; 817 nz = ai[i+1] - ai[i]; 818 s[0] = b[i2]; s[1] = b[i2+1]; 819 for (j=0; j<nz; j++) { 820 idx = 2*vi[j]; 821 it = 4*j; 822 xw[0] = x[idx]; xw[1] = x[1+idx]; 823 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 824 } 825 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 826 x[i2] += xw[0]; x[i2+1] += xw[1]; 827 idiag -= 4; 828 i2 -= 2; 829 } 830 break; 831 case 3: 832 for (i=m-1; i>=0; i--) { 833 v = aa + 9*ai[i]; 834 vi = aj + ai[i]; 835 nz = ai[i+1] - ai[i]; 836 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 837 while (nz--) { 838 idx = 3*(*vi++); 839 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 840 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 841 v += 9; 842 } 843 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 844 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 845 idiag -= 9; 846 i2 -= 3; 847 } 848 break; 849 case 4: 850 for (i=m-1; i>=0; i--) { 851 v = aa + 16*ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i+1] - ai[i]; 854 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 855 while (nz--) { 856 idx = 4*(*vi++); 857 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 858 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 859 v += 16; 860 } 861 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 862 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 863 idiag -= 16; 864 i2 -= 4; 865 } 866 break; 867 case 5: 868 for (i=m-1; i>=0; i--) { 869 v = aa + 25*ai[i]; 870 vi = aj + ai[i]; 871 nz = ai[i+1] - ai[i]; 872 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 873 while (nz--) { 874 idx = 5*(*vi++); 875 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 876 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 877 v += 25; 878 } 879 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 880 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 881 idiag -= 25; 882 i2 -= 5; 883 } 884 break; 885 case 6: 886 for (i=m-1; i>=0; i--) { 887 v = aa + 36*ai[i]; 888 vi = aj + ai[i]; 889 nz = ai[i+1] - ai[i]; 890 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 891 while (nz--) { 892 idx = 6*(*vi++); 893 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 894 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 895 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 896 v += 36; 897 } 898 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 899 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 900 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 901 idiag -= 36; 902 i2 -= 6; 903 } 904 break; 905 case 7: 906 for (i=m-1; i>=0; i--) { 907 v = aa + 49*ai[i]; 908 vi = aj + ai[i]; 909 nz = ai[i+1] - ai[i]; 910 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 911 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 912 while (nz--) { 913 idx = 7*(*vi++); 914 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 915 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 916 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 917 v += 49; 918 } 919 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 920 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 921 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 922 idiag -= 49; 923 i2 -= 7; 924 } 925 break; 926 default: 927 for (i=m-1; i>=0; i--) { 928 v = aa + bs2*ai[i]; 929 vi = aj + ai[i]; 930 nz = ai[i+1] - ai[i]; 931 932 PetscCall(PetscArraycpy(w,b+i2,bs)); 933 /* copy all rows of x that are needed into contiguous space */ 934 workt = work; 935 for (j=0; j<nz; j++) { 936 PetscCall(PetscArraycpy(workt,x + bs*(*vi++),bs)); 937 workt += bs; 938 } 939 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 940 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 941 942 idiag -= bs2; 943 i2 -= bs; 944 } 945 break; 946 } 947 PetscCall(PetscLogFlops(2.0*bs2*(a->nz))); 948 } 949 } 950 PetscCall(VecRestoreArray(xx,&x)); 951 PetscCall(VecRestoreArrayRead(bb,&b)); 952 PetscFunctionReturn(0); 953 } 954 955 /* 956 Special version for direct calls from Fortran (Used in PETSc-fun3d) 957 */ 958 #if defined(PETSC_HAVE_FORTRAN_CAPS) 959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 961 #define matsetvaluesblocked4_ matsetvaluesblocked4 962 #endif 963 964 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 965 { 966 Mat A = *AA; 967 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 968 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 969 PetscInt *ai =a->i,*ailen=a->ilen; 970 PetscInt *aj =a->j,stepval,lastcol = -1; 971 const PetscScalar *value = v; 972 MatScalar *ap,*aa = a->a,*bap; 973 974 PetscFunctionBegin; 975 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 976 stepval = (n-1)*4; 977 for (k=0; k<m; k++) { /* loop over added rows */ 978 row = im[k]; 979 rp = aj + ai[row]; 980 ap = aa + 16*ai[row]; 981 nrow = ailen[row]; 982 low = 0; 983 high = nrow; 984 for (l=0; l<n; l++) { /* loop over added columns */ 985 col = in[l]; 986 if (col <= lastcol) low = 0; 987 else high = nrow; 988 lastcol = col; 989 value = v + k*(stepval+4 + l)*4; 990 while (high-low > 7) { 991 t = (low+high)/2; 992 if (rp[t] > col) high = t; 993 else low = t; 994 } 995 for (i=low; i<high; i++) { 996 if (rp[i] > col) break; 997 if (rp[i] == col) { 998 bap = ap + 16*i; 999 for (ii=0; ii<4; ii++,value+=stepval) { 1000 for (jj=ii; jj<16; jj+=4) { 1001 bap[jj] += *value++; 1002 } 1003 } 1004 goto noinsert2; 1005 } 1006 } 1007 N = nrow++ - 1; 1008 high++; /* added new column index thus must search to one higher than before */ 1009 /* shift up all the later entries in this row */ 1010 for (ii=N; ii>=i; ii--) { 1011 rp[ii+1] = rp[ii]; 1012 PetscCallVoid(PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16)); 1013 } 1014 if (N >= i) { 1015 PetscCallVoid(PetscArrayzero(ap+16*i,16)); 1016 } 1017 rp[i] = col; 1018 bap = ap + 16*i; 1019 for (ii=0; ii<4; ii++,value+=stepval) { 1020 for (jj=ii; jj<16; jj+=4) { 1021 bap[jj] = *value++; 1022 } 1023 } 1024 noinsert2:; 1025 low = i; 1026 } 1027 ailen[row] = nrow; 1028 } 1029 PetscFunctionReturnVoid(); 1030 } 1031 1032 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1033 #define matsetvalues4_ MATSETVALUES4 1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1035 #define matsetvalues4_ matsetvalues4 1036 #endif 1037 1038 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1039 { 1040 Mat A = *AA; 1041 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1042 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm; 1043 PetscInt *ai=a->i,*ailen=a->ilen; 1044 PetscInt *aj=a->j,brow,bcol; 1045 PetscInt ridx,cidx,lastcol = -1; 1046 MatScalar *ap,value,*aa=a->a,*bap; 1047 1048 PetscFunctionBegin; 1049 for (k=0; k<m; k++) { /* loop over added rows */ 1050 row = im[k]; brow = row/4; 1051 rp = aj + ai[brow]; 1052 ap = aa + 16*ai[brow]; 1053 nrow = ailen[brow]; 1054 low = 0; 1055 high = nrow; 1056 for (l=0; l<n; l++) { /* loop over added columns */ 1057 col = in[l]; bcol = col/4; 1058 ridx = row % 4; cidx = col % 4; 1059 value = v[l + k*n]; 1060 if (col <= lastcol) low = 0; 1061 else high = nrow; 1062 lastcol = col; 1063 while (high-low > 7) { 1064 t = (low+high)/2; 1065 if (rp[t] > bcol) high = t; 1066 else low = t; 1067 } 1068 for (i=low; i<high; i++) { 1069 if (rp[i] > bcol) break; 1070 if (rp[i] == bcol) { 1071 bap = ap + 16*i + 4*cidx + ridx; 1072 *bap += value; 1073 goto noinsert1; 1074 } 1075 } 1076 N = nrow++ - 1; 1077 high++; /* added new column thus must search to one higher than before */ 1078 /* shift up all the later entries in this row */ 1079 PetscCallVoid(PetscArraymove(rp+i+1,rp+i,N-i+1)); 1080 PetscCallVoid(PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1))); 1081 PetscCallVoid(PetscArrayzero(ap+16*i,16)); 1082 rp[i] = bcol; 1083 ap[16*i + 4*cidx + ridx] = value; 1084 noinsert1:; 1085 low = i; 1086 } 1087 ailen[brow] = nrow; 1088 } 1089 PetscFunctionReturnVoid(); 1090 } 1091 1092 /* 1093 Checks for missing diagonals 1094 */ 1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1098 PetscInt *diag,*ii = a->i,i; 1099 1100 PetscFunctionBegin; 1101 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1102 *missing = PETSC_FALSE; 1103 if (A->rmap->n > 0 && !ii) { 1104 *missing = PETSC_TRUE; 1105 if (d) *d = 0; 1106 PetscCall(PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n")); 1107 } else { 1108 PetscInt n; 1109 n = PetscMin(a->mbs, a->nbs); 1110 diag = a->diag; 1111 for (i=0; i<n; i++) { 1112 if (diag[i] >= ii[i+1]) { 1113 *missing = PETSC_TRUE; 1114 if (d) *d = i; 1115 PetscCall(PetscInfo(A,"Matrix is missing block diagonal number %" PetscInt_FMT "\n",i)); 1116 break; 1117 } 1118 } 1119 } 1120 PetscFunctionReturn(0); 1121 } 1122 1123 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1124 { 1125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1126 PetscInt i,j,m = a->mbs; 1127 1128 PetscFunctionBegin; 1129 if (!a->diag) { 1130 PetscCall(PetscMalloc1(m,&a->diag)); 1131 PetscCall(PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt))); 1132 a->free_diag = PETSC_TRUE; 1133 } 1134 for (i=0; i<m; i++) { 1135 a->diag[i] = a->i[i+1]; 1136 for (j=a->i[i]; j<a->i[i+1]; j++) { 1137 if (a->j[j] == i) { 1138 a->diag[i] = j; 1139 break; 1140 } 1141 } 1142 } 1143 PetscFunctionReturn(0); 1144 } 1145 1146 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1147 { 1148 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1149 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1150 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1151 1152 PetscFunctionBegin; 1153 *nn = n; 1154 if (!ia) PetscFunctionReturn(0); 1155 if (symmetric) { 1156 PetscCall(MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja)); 1157 nz = tia[n]; 1158 } else { 1159 tia = a->i; tja = a->j; 1160 } 1161 1162 if (!blockcompressed && bs > 1) { 1163 (*nn) *= bs; 1164 /* malloc & create the natural set of indices */ 1165 PetscCall(PetscMalloc1((n+1)*bs,ia)); 1166 if (n) { 1167 (*ia)[0] = oshift; 1168 for (j=1; j<bs; j++) { 1169 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1170 } 1171 } 1172 1173 for (i=1; i<n; i++) { 1174 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1175 for (j=1; j<bs; j++) { 1176 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1177 } 1178 } 1179 if (n) { 1180 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1181 } 1182 1183 if (inja) { 1184 PetscCall(PetscMalloc1(nz*bs*bs,ja)); 1185 cnt = 0; 1186 for (i=0; i<n; i++) { 1187 for (j=0; j<bs; j++) { 1188 for (k=tia[i]; k<tia[i+1]; k++) { 1189 for (l=0; l<bs; l++) { 1190 (*ja)[cnt++] = bs*tja[k] + l; 1191 } 1192 } 1193 } 1194 } 1195 } 1196 1197 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1198 PetscCall(PetscFree(tia)); 1199 PetscCall(PetscFree(tja)); 1200 } 1201 } else if (oshift == 1) { 1202 if (symmetric) { 1203 nz = tia[A->rmap->n/bs]; 1204 /* add 1 to i and j indices */ 1205 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1206 *ia = tia; 1207 if (ja) { 1208 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1209 *ja = tja; 1210 } 1211 } else { 1212 nz = a->i[A->rmap->n/bs]; 1213 /* malloc space and add 1 to i and j indices */ 1214 PetscCall(PetscMalloc1(A->rmap->n/bs+1,ia)); 1215 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1216 if (ja) { 1217 PetscCall(PetscMalloc1(nz,ja)); 1218 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1219 } 1220 } 1221 } else { 1222 *ia = tia; 1223 if (ja) *ja = tja; 1224 } 1225 PetscFunctionReturn(0); 1226 } 1227 1228 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1229 { 1230 PetscFunctionBegin; 1231 if (!ia) PetscFunctionReturn(0); 1232 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1233 PetscCall(PetscFree(*ia)); 1234 if (ja) PetscCall(PetscFree(*ja)); 1235 } 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1240 { 1241 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1242 1243 PetscFunctionBegin; 1244 #if defined(PETSC_USE_LOG) 1245 PetscLogObjectState((PetscObject)A,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT,A->rmap->N,A->cmap->n,a->nz); 1246 #endif 1247 PetscCall(MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i)); 1248 PetscCall(ISDestroy(&a->row)); 1249 PetscCall(ISDestroy(&a->col)); 1250 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1251 PetscCall(PetscFree(a->idiag)); 1252 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax,a->ilen)); 1253 PetscCall(PetscFree(a->solve_work)); 1254 PetscCall(PetscFree(a->mult_work)); 1255 PetscCall(PetscFree(a->sor_workt)); 1256 PetscCall(PetscFree(a->sor_work)); 1257 PetscCall(ISDestroy(&a->icol)); 1258 PetscCall(PetscFree(a->saved_values)); 1259 PetscCall(PetscFree2(a->compressedrow.i,a->compressedrow.rindex)); 1260 1261 PetscCall(MatDestroy(&a->sbaijMat)); 1262 PetscCall(MatDestroy(&a->parent)); 1263 PetscCall(PetscFree(A->data)); 1264 1265 PetscCall(PetscObjectChangeTypeName((PetscObject)A,NULL)); 1266 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL)); 1267 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL)); 1268 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL)); 1269 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL)); 1270 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL)); 1271 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL)); 1277 #if defined(PETSC_HAVE_HYPRE) 1278 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL)); 1279 #endif 1280 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL)); 1281 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL)); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1286 { 1287 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1288 1289 PetscFunctionBegin; 1290 switch (op) { 1291 case MAT_ROW_ORIENTED: 1292 a->roworiented = flg; 1293 break; 1294 case MAT_KEEP_NONZERO_PATTERN: 1295 a->keepnonzeropattern = flg; 1296 break; 1297 case MAT_NEW_NONZERO_LOCATIONS: 1298 a->nonew = (flg ? 0 : 1); 1299 break; 1300 case MAT_NEW_NONZERO_LOCATION_ERR: 1301 a->nonew = (flg ? -1 : 0); 1302 break; 1303 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1304 a->nonew = (flg ? -2 : 0); 1305 break; 1306 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1307 a->nounused = (flg ? -1 : 0); 1308 break; 1309 case MAT_FORCE_DIAGONAL_ENTRIES: 1310 case MAT_IGNORE_OFF_PROC_ENTRIES: 1311 case MAT_USE_HASH_TABLE: 1312 case MAT_SORTED_FULL: 1313 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 1314 break; 1315 case MAT_SPD: 1316 case MAT_SYMMETRIC: 1317 case MAT_STRUCTURALLY_SYMMETRIC: 1318 case MAT_HERMITIAN: 1319 case MAT_SYMMETRY_ETERNAL: 1320 case MAT_SUBMAT_SINGLEIS: 1321 case MAT_STRUCTURE_ONLY: 1322 /* These options are handled directly by MatSetOption() */ 1323 break; 1324 default: 1325 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1326 } 1327 PetscFunctionReturn(0); 1328 } 1329 1330 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1331 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1332 { 1333 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1334 MatScalar *aa_i; 1335 PetscScalar *v_i; 1336 1337 PetscFunctionBegin; 1338 bs = A->rmap->bs; 1339 bs2 = bs*bs; 1340 PetscCheck(row >= 0 && row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range", row); 1341 1342 bn = row/bs; /* Block number */ 1343 bp = row % bs; /* Block Position */ 1344 M = ai[bn+1] - ai[bn]; 1345 *nz = bs*M; 1346 1347 if (v) { 1348 *v = NULL; 1349 if (*nz) { 1350 PetscCall(PetscMalloc1(*nz,v)); 1351 for (i=0; i<M; i++) { /* for each block in the block row */ 1352 v_i = *v + i*bs; 1353 aa_i = aa + bs2*(ai[bn] + i); 1354 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1355 } 1356 } 1357 } 1358 1359 if (idx) { 1360 *idx = NULL; 1361 if (*nz) { 1362 PetscCall(PetscMalloc1(*nz,idx)); 1363 for (i=0; i<M; i++) { /* for each block in the block row */ 1364 idx_i = *idx + i*bs; 1365 itmp = bs*aj[ai[bn] + i]; 1366 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1367 } 1368 } 1369 } 1370 PetscFunctionReturn(0); 1371 } 1372 1373 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1374 { 1375 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1376 1377 PetscFunctionBegin; 1378 PetscCall(MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a)); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1383 { 1384 PetscFunctionBegin; 1385 if (nz) *nz = 0; 1386 if (idx) PetscCall(PetscFree(*idx)); 1387 if (v) PetscCall(PetscFree(*v)); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1392 { 1393 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data,*at; 1394 Mat C; 1395 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill; 1396 PetscInt bs2=a->bs2,*ati,*atj,anzj,kr; 1397 MatScalar *ata,*aa=a->a; 1398 1399 PetscFunctionBegin; 1400 PetscCall(PetscCalloc1(1+nbs,&atfill)); 1401 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1402 for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1403 1404 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&C)); 1405 PetscCall(MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N)); 1406 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 1407 PetscCall(MatSeqBAIJSetPreallocation(C,bs,0,atfill)); 1408 1409 at = (Mat_SeqBAIJ*)C->data; 1410 ati = at->i; 1411 for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i]; 1412 } else { 1413 C = *B; 1414 at = (Mat_SeqBAIJ*)C->data; 1415 ati = at->i; 1416 } 1417 1418 atj = at->j; 1419 ata = at->a; 1420 1421 /* Copy ati into atfill so we have locations of the next free space in atj */ 1422 PetscCall(PetscArraycpy(atfill,ati,nbs)); 1423 1424 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1425 for (i=0; i<mbs; i++) { 1426 anzj = ai[i+1] - ai[i]; 1427 for (j=0; j<anzj; j++) { 1428 atj[atfill[*aj]] = i; 1429 for (kr=0; kr<bs; kr++) { 1430 for (k=0; k<bs; k++) { 1431 ata[bs2*atfill[*aj]+k*bs+kr] = *aa++; 1432 } 1433 } 1434 atfill[*aj++] += 1; 1435 } 1436 } 1437 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 1438 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 1439 1440 /* Clean up temporary space and complete requests. */ 1441 PetscCall(PetscFree(atfill)); 1442 1443 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1444 PetscCall(MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs))); 1445 *B = C; 1446 } else { 1447 PetscCall(MatHeaderMerge(A,&C)); 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1453 { 1454 Mat Btrans; 1455 1456 PetscFunctionBegin; 1457 *f = PETSC_FALSE; 1458 PetscCall(MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans)); 1459 PetscCall(MatEqual_SeqBAIJ(B,Btrans,f)); 1460 PetscCall(MatDestroy(&Btrans)); 1461 PetscFunctionReturn(0); 1462 } 1463 1464 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1465 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 1466 { 1467 Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)mat->data; 1468 PetscInt header[4],M,N,m,bs,nz,cnt,i,j,k,l; 1469 PetscInt *rowlens,*colidxs; 1470 PetscScalar *matvals; 1471 1472 PetscFunctionBegin; 1473 PetscCall(PetscViewerSetUp(viewer)); 1474 1475 M = mat->rmap->N; 1476 N = mat->cmap->N; 1477 m = mat->rmap->n; 1478 bs = mat->rmap->bs; 1479 nz = bs*bs*A->nz; 1480 1481 /* write matrix header */ 1482 header[0] = MAT_FILE_CLASSID; 1483 header[1] = M; header[2] = N; header[3] = nz; 1484 PetscCall(PetscViewerBinaryWrite(viewer,header,4,PETSC_INT)); 1485 1486 /* store row lengths */ 1487 PetscCall(PetscMalloc1(m,&rowlens)); 1488 for (cnt=0, i=0; i<A->mbs; i++) 1489 for (j=0; j<bs; j++) 1490 rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]); 1491 PetscCall(PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT)); 1492 PetscCall(PetscFree(rowlens)); 1493 1494 /* store column indices */ 1495 PetscCall(PetscMalloc1(nz,&colidxs)); 1496 for (cnt=0, i=0; i<A->mbs; i++) 1497 for (k=0; k<bs; k++) 1498 for (j=A->i[i]; j<A->i[i+1]; j++) 1499 for (l=0; l<bs; l++) 1500 colidxs[cnt++] = bs*A->j[j] + l; 1501 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1502 PetscCall(PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT)); 1503 PetscCall(PetscFree(colidxs)); 1504 1505 /* store nonzero values */ 1506 PetscCall(PetscMalloc1(nz,&matvals)); 1507 for (cnt=0, i=0; i<A->mbs; i++) 1508 for (k=0; k<bs; k++) 1509 for (j=A->i[i]; j<A->i[i+1]; j++) 1510 for (l=0; l<bs; l++) 1511 matvals[cnt++] = A->a[bs*(bs*j + l) + k]; 1512 PetscCheck(cnt == nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1513 PetscCall(PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR)); 1514 PetscCall(PetscFree(matvals)); 1515 1516 /* write block size option to the viewer's .info file */ 1517 PetscCall(MatView_Binary_BlockSizes(mat,viewer)); 1518 PetscFunctionReturn(0); 1519 } 1520 1521 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer) 1522 { 1523 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1524 PetscInt i,bs = A->rmap->bs,k; 1525 1526 PetscFunctionBegin; 1527 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1528 for (i=0; i<a->mbs; i++) { 1529 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT "-%" PetscInt_FMT ":",i*bs,i*bs+bs-1)); 1530 for (k=a->i[i]; k<a->i[i+1]; k++) { 1531 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT "-%" PetscInt_FMT ") ",bs*a->j[k],bs*a->j[k]+bs-1)); 1532 } 1533 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1534 } 1535 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1540 { 1541 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1542 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1543 PetscViewerFormat format; 1544 1545 PetscFunctionBegin; 1546 if (A->structure_only) { 1547 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A,viewer)); 1548 PetscFunctionReturn(0); 1549 } 1550 1551 PetscCall(PetscViewerGetFormat(viewer,&format)); 1552 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1553 PetscCall(PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs)); 1554 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1555 const char *matname; 1556 Mat aij; 1557 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij)); 1558 PetscCall(PetscObjectGetName((PetscObject)A,&matname)); 1559 PetscCall(PetscObjectSetName((PetscObject)aij,matname)); 1560 PetscCall(MatView(aij,viewer)); 1561 PetscCall(MatDestroy(&aij)); 1562 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1563 PetscFunctionReturn(0); 1564 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1565 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1566 for (i=0; i<a->mbs; i++) { 1567 for (j=0; j<bs; j++) { 1568 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j)); 1569 for (k=a->i[i]; k<a->i[i+1]; k++) { 1570 for (l=0; l<bs; l++) { 1571 #if defined(PETSC_USE_COMPLEX) 1572 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1573 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %gi) ",bs*a->j[k]+l, 1574 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1575 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1576 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %gi) ",bs*a->j[k]+l, 1577 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1578 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1579 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]))); 1580 } 1581 #else 1582 if (a->a[bs2*k + l*bs + j] != 0.0) { 1583 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j])); 1584 } 1585 #endif 1586 } 1587 } 1588 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1589 } 1590 } 1591 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1592 } else { 1593 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_FALSE)); 1594 for (i=0; i<a->mbs; i++) { 1595 for (j=0; j<bs; j++) { 1596 PetscCall(PetscViewerASCIIPrintf(viewer,"row %" PetscInt_FMT ":",i*bs+j)); 1597 for (k=a->i[i]; k<a->i[i+1]; k++) { 1598 for (l=0; l<bs; l++) { 1599 #if defined(PETSC_USE_COMPLEX) 1600 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1601 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g + %g i) ",bs*a->j[k]+l, 1602 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1603 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1604 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g - %g i) ",bs*a->j[k]+l, 1605 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]))); 1606 } else { 1607 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]))); 1608 } 1609 #else 1610 PetscCall(PetscViewerASCIIPrintf(viewer," (%" PetscInt_FMT ", %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j])); 1611 #endif 1612 } 1613 } 1614 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 1615 } 1616 } 1617 PetscCall(PetscViewerASCIIUseTabs(viewer,PETSC_TRUE)); 1618 } 1619 PetscCall(PetscViewerFlush(viewer)); 1620 PetscFunctionReturn(0); 1621 } 1622 1623 #include <petscdraw.h> 1624 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1625 { 1626 Mat A = (Mat) Aa; 1627 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1628 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1629 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1630 MatScalar *aa; 1631 PetscViewer viewer; 1632 PetscViewerFormat format; 1633 1634 PetscFunctionBegin; 1635 PetscCall(PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer)); 1636 PetscCall(PetscViewerGetFormat(viewer,&format)); 1637 PetscCall(PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr)); 1638 1639 /* loop over matrix elements drawing boxes */ 1640 1641 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1642 PetscDrawCollectiveBegin(draw); 1643 /* Blue for negative, Cyan for zero and Red for positive */ 1644 color = PETSC_DRAW_BLUE; 1645 for (i=0,row=0; i<mbs; i++,row+=bs) { 1646 for (j=a->i[i]; j<a->i[i+1]; j++) { 1647 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1648 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1649 aa = a->a + j*bs2; 1650 for (k=0; k<bs; k++) { 1651 for (l=0; l<bs; l++) { 1652 if (PetscRealPart(*aa++) >= 0.) continue; 1653 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1654 } 1655 } 1656 } 1657 } 1658 color = PETSC_DRAW_CYAN; 1659 for (i=0,row=0; i<mbs; i++,row+=bs) { 1660 for (j=a->i[i]; j<a->i[i+1]; j++) { 1661 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1662 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1663 aa = a->a + j*bs2; 1664 for (k=0; k<bs; k++) { 1665 for (l=0; l<bs; l++) { 1666 if (PetscRealPart(*aa++) != 0.) continue; 1667 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1668 } 1669 } 1670 } 1671 } 1672 color = PETSC_DRAW_RED; 1673 for (i=0,row=0; i<mbs; i++,row+=bs) { 1674 for (j=a->i[i]; j<a->i[i+1]; j++) { 1675 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1676 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1677 aa = a->a + j*bs2; 1678 for (k=0; k<bs; k++) { 1679 for (l=0; l<bs; l++) { 1680 if (PetscRealPart(*aa++) <= 0.) continue; 1681 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1682 } 1683 } 1684 } 1685 } 1686 PetscDrawCollectiveEnd(draw); 1687 } else { 1688 /* use contour shading to indicate magnitude of values */ 1689 /* first determine max of all nonzero values */ 1690 PetscReal minv = 0.0, maxv = 0.0; 1691 PetscDraw popup; 1692 1693 for (i=0; i<a->nz*a->bs2; i++) { 1694 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1695 } 1696 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1697 PetscCall(PetscDrawGetPopup(draw,&popup)); 1698 PetscCall(PetscDrawScalePopup(popup,0.0,maxv)); 1699 1700 PetscDrawCollectiveBegin(draw); 1701 for (i=0,row=0; i<mbs; i++,row+=bs) { 1702 for (j=a->i[i]; j<a->i[i+1]; j++) { 1703 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1704 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1705 aa = a->a + j*bs2; 1706 for (k=0; k<bs; k++) { 1707 for (l=0; l<bs; l++) { 1708 MatScalar v = *aa++; 1709 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1710 PetscCall(PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color)); 1711 } 1712 } 1713 } 1714 } 1715 PetscDrawCollectiveEnd(draw); 1716 } 1717 PetscFunctionReturn(0); 1718 } 1719 1720 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1721 { 1722 PetscReal xl,yl,xr,yr,w,h; 1723 PetscDraw draw; 1724 PetscBool isnull; 1725 1726 PetscFunctionBegin; 1727 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 1728 PetscCall(PetscDrawIsNull(draw,&isnull)); 1729 if (isnull) PetscFunctionReturn(0); 1730 1731 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1732 xr += w; yr += h; xl = -w; yl = -h; 1733 PetscCall(PetscDrawSetCoordinates(draw,xl,yl,xr,yr)); 1734 PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer)); 1735 PetscCall(PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A)); 1736 PetscCall(PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL)); 1737 PetscCall(PetscDrawSave(draw)); 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1742 { 1743 PetscBool iascii,isbinary,isdraw; 1744 1745 PetscFunctionBegin; 1746 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 1747 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 1748 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 1749 if (iascii) { 1750 PetscCall(MatView_SeqBAIJ_ASCII(A,viewer)); 1751 } else if (isbinary) { 1752 PetscCall(MatView_SeqBAIJ_Binary(A,viewer)); 1753 } else if (isdraw) { 1754 PetscCall(MatView_SeqBAIJ_Draw(A,viewer)); 1755 } else { 1756 Mat B; 1757 PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1758 PetscCall(MatView(B,viewer)); 1759 PetscCall(MatDestroy(&B)); 1760 } 1761 PetscFunctionReturn(0); 1762 } 1763 1764 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1765 { 1766 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1767 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1768 PetscInt *ai = a->i,*ailen = a->ilen; 1769 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1770 MatScalar *ap,*aa = a->a; 1771 1772 PetscFunctionBegin; 1773 for (k=0; k<m; k++) { /* loop over rows */ 1774 row = im[k]; brow = row/bs; 1775 if (row < 0) {v += n; continue;} /* negative row */ 1776 PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " too large", row); 1777 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 1778 ap = aa ? aa + bs2*ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 1779 nrow = ailen[brow]; 1780 for (l=0; l<n; l++) { /* loop over columns */ 1781 if (in[l] < 0) {v++; continue;} /* negative column */ 1782 PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %" PetscInt_FMT " too large", in[l]); 1783 col = in[l]; 1784 bcol = col/bs; 1785 cidx = col%bs; 1786 ridx = row%bs; 1787 high = nrow; 1788 low = 0; /* assume unsorted */ 1789 while (high-low > 5) { 1790 t = (low+high)/2; 1791 if (rp[t] > bcol) high = t; 1792 else low = t; 1793 } 1794 for (i=low; i<high; i++) { 1795 if (rp[i] > bcol) break; 1796 if (rp[i] == bcol) { 1797 *v++ = ap[bs2*i+bs*cidx+ridx]; 1798 goto finished; 1799 } 1800 } 1801 *v++ = 0.0; 1802 finished:; 1803 } 1804 } 1805 PetscFunctionReturn(0); 1806 } 1807 1808 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1809 { 1810 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1811 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1812 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1813 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1814 PetscBool roworiented=a->roworiented; 1815 const PetscScalar *value = v; 1816 MatScalar *ap=NULL,*aa = a->a,*bap; 1817 1818 PetscFunctionBegin; 1819 if (roworiented) { 1820 stepval = (n-1)*bs; 1821 } else { 1822 stepval = (m-1)*bs; 1823 } 1824 for (k=0; k<m; k++) { /* loop over added rows */ 1825 row = im[k]; 1826 if (row < 0) continue; 1827 PetscCheck(row < a->mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT,row,a->mbs-1); 1828 rp = aj + ai[row]; 1829 if (!A->structure_only) ap = aa + bs2*ai[row]; 1830 rmax = imax[row]; 1831 nrow = ailen[row]; 1832 low = 0; 1833 high = nrow; 1834 for (l=0; l<n; l++) { /* loop over added columns */ 1835 if (in[l] < 0) continue; 1836 PetscCheck(in[l] < a->nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT,in[l],a->nbs-1); 1837 col = in[l]; 1838 if (!A->structure_only) { 1839 if (roworiented) { 1840 value = v + (k*(stepval+bs) + l)*bs; 1841 } else { 1842 value = v + (l*(stepval+bs) + k)*bs; 1843 } 1844 } 1845 if (col <= lastcol) low = 0; 1846 else high = nrow; 1847 lastcol = col; 1848 while (high-low > 7) { 1849 t = (low+high)/2; 1850 if (rp[t] > col) high = t; 1851 else low = t; 1852 } 1853 for (i=low; i<high; i++) { 1854 if (rp[i] > col) break; 1855 if (rp[i] == col) { 1856 if (A->structure_only) goto noinsert2; 1857 bap = ap + bs2*i; 1858 if (roworiented) { 1859 if (is == ADD_VALUES) { 1860 for (ii=0; ii<bs; ii++,value+=stepval) { 1861 for (jj=ii; jj<bs2; jj+=bs) { 1862 bap[jj] += *value++; 1863 } 1864 } 1865 } else { 1866 for (ii=0; ii<bs; ii++,value+=stepval) { 1867 for (jj=ii; jj<bs2; jj+=bs) { 1868 bap[jj] = *value++; 1869 } 1870 } 1871 } 1872 } else { 1873 if (is == ADD_VALUES) { 1874 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1875 for (jj=0; jj<bs; jj++) { 1876 bap[jj] += value[jj]; 1877 } 1878 bap += bs; 1879 } 1880 } else { 1881 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1882 for (jj=0; jj<bs; jj++) { 1883 bap[jj] = value[jj]; 1884 } 1885 bap += bs; 1886 } 1887 } 1888 } 1889 goto noinsert2; 1890 } 1891 } 1892 if (nonew == 1) goto noinsert2; 1893 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 1894 if (A->structure_only) { 1895 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar); 1896 } else { 1897 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1898 } 1899 N = nrow++ - 1; high++; 1900 /* shift up all the later entries in this row */ 1901 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 1902 rp[i] = col; 1903 if (!A->structure_only) { 1904 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 1905 bap = ap + bs2*i; 1906 if (roworiented) { 1907 for (ii=0; ii<bs; ii++,value+=stepval) { 1908 for (jj=ii; jj<bs2; jj+=bs) { 1909 bap[jj] = *value++; 1910 } 1911 } 1912 } else { 1913 for (ii=0; ii<bs; ii++,value+=stepval) { 1914 for (jj=0; jj<bs; jj++) { 1915 *bap++ = *value++; 1916 } 1917 } 1918 } 1919 } 1920 noinsert2:; 1921 low = i; 1922 } 1923 ailen[row] = nrow; 1924 } 1925 PetscFunctionReturn(0); 1926 } 1927 1928 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1929 { 1930 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1931 PetscInt fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax; 1932 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1933 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1934 MatScalar *aa = a->a,*ap; 1935 PetscReal ratio=0.6; 1936 1937 PetscFunctionBegin; 1938 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1939 1940 if (m) rmax = ailen[0]; 1941 for (i=1; i<mbs; i++) { 1942 /* move each row back by the amount of empty slots (fshift) before it*/ 1943 fshift += imax[i-1] - ailen[i-1]; 1944 rmax = PetscMax(rmax,ailen[i]); 1945 if (fshift) { 1946 ip = aj + ai[i]; 1947 ap = aa + bs2*ai[i]; 1948 N = ailen[i]; 1949 PetscCall(PetscArraymove(ip-fshift,ip,N)); 1950 if (!A->structure_only) { 1951 PetscCall(PetscArraymove(ap-bs2*fshift,ap,bs2*N)); 1952 } 1953 } 1954 ai[i] = ai[i-1] + ailen[i-1]; 1955 } 1956 if (mbs) { 1957 fshift += imax[mbs-1] - ailen[mbs-1]; 1958 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1959 } 1960 1961 /* reset ilen and imax for each row */ 1962 a->nonzerorowcnt = 0; 1963 if (A->structure_only) { 1964 PetscCall(PetscFree2(a->imax,a->ilen)); 1965 } else { /* !A->structure_only */ 1966 for (i=0; i<mbs; i++) { 1967 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1968 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1969 } 1970 } 1971 a->nz = ai[mbs]; 1972 1973 /* diagonals may have moved, so kill the diagonal pointers */ 1974 a->idiagvalid = PETSC_FALSE; 1975 if (fshift && a->diag) { 1976 PetscCall(PetscFree(a->diag)); 1977 PetscCall(PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt))); 1978 a->diag = NULL; 1979 } 1980 if (fshift) PetscCheck(a->nounused != -1,PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1981 PetscCall(PetscInfo(A,"Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2)); 1982 PetscCall(PetscInfo(A,"Number of mallocs during MatSetValues is %" PetscInt_FMT "\n",a->reallocs)); 1983 PetscCall(PetscInfo(A,"Most nonzeros blocks in any row is %" PetscInt_FMT "\n",rmax)); 1984 1985 A->info.mallocs += a->reallocs; 1986 a->reallocs = 0; 1987 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1988 a->rmax = rmax; 1989 1990 if (!A->structure_only) { 1991 PetscCall(MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio)); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 /* 1997 This function returns an array of flags which indicate the locations of contiguous 1998 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1999 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2000 Assume: sizes should be long enough to hold all the values. 2001 */ 2002 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 2003 { 2004 PetscInt i,j,k,row; 2005 PetscBool flg; 2006 2007 PetscFunctionBegin; 2008 for (i=0,j=0; i<n; j++) { 2009 row = idx[i]; 2010 if (row%bs!=0) { /* Not the beginning of a block */ 2011 sizes[j] = 1; 2012 i++; 2013 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2014 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2015 i++; 2016 } else { /* Beginning of the block, so check if the complete block exists */ 2017 flg = PETSC_TRUE; 2018 for (k=1; k<bs; k++) { 2019 if (row+k != idx[i+k]) { /* break in the block */ 2020 flg = PETSC_FALSE; 2021 break; 2022 } 2023 } 2024 if (flg) { /* No break in the bs */ 2025 sizes[j] = bs; 2026 i += bs; 2027 } else { 2028 sizes[j] = 1; 2029 i++; 2030 } 2031 } 2032 } 2033 *bs_max = j; 2034 PetscFunctionReturn(0); 2035 } 2036 2037 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2038 { 2039 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2040 PetscInt i,j,k,count,*rows; 2041 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2042 PetscScalar zero = 0.0; 2043 MatScalar *aa; 2044 const PetscScalar *xx; 2045 PetscScalar *bb; 2046 2047 PetscFunctionBegin; 2048 /* fix right hand side if needed */ 2049 if (x && b) { 2050 PetscCall(VecGetArrayRead(x,&xx)); 2051 PetscCall(VecGetArray(b,&bb)); 2052 for (i=0; i<is_n; i++) { 2053 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2054 } 2055 PetscCall(VecRestoreArrayRead(x,&xx)); 2056 PetscCall(VecRestoreArray(b,&bb)); 2057 } 2058 2059 /* Make a copy of the IS and sort it */ 2060 /* allocate memory for rows,sizes */ 2061 PetscCall(PetscMalloc2(is_n,&rows,2*is_n,&sizes)); 2062 2063 /* copy IS values to rows, and sort them */ 2064 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2065 PetscCall(PetscSortInt(is_n,rows)); 2066 2067 if (baij->keepnonzeropattern) { 2068 for (i=0; i<is_n; i++) sizes[i] = 1; 2069 bs_max = is_n; 2070 } else { 2071 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max)); 2072 A->nonzerostate++; 2073 } 2074 2075 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2076 row = rows[j]; 2077 PetscCheck(row >= 0 && row <= A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",row); 2078 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2079 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2080 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2081 if (diag != (PetscScalar)0.0) { 2082 if (baij->ilen[row/bs] > 0) { 2083 baij->ilen[row/bs] = 1; 2084 baij->j[baij->i[row/bs]] = row/bs; 2085 2086 PetscCall(PetscArrayzero(aa,count*bs)); 2087 } 2088 /* Now insert all the diagonal values for this bs */ 2089 for (k=0; k<bs; k++) { 2090 PetscCall((*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES)); 2091 } 2092 } else { /* (diag == 0.0) */ 2093 baij->ilen[row/bs] = 0; 2094 } /* end (diag == 0.0) */ 2095 } else { /* (sizes[i] != bs) */ 2096 PetscAssert(sizes[i] == 1,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2097 for (k=0; k<count; k++) { 2098 aa[0] = zero; 2099 aa += bs; 2100 } 2101 if (diag != (PetscScalar)0.0) { 2102 PetscCall((*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES)); 2103 } 2104 } 2105 } 2106 2107 PetscCall(PetscFree2(rows,sizes)); 2108 PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY)); 2109 PetscFunctionReturn(0); 2110 } 2111 2112 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2113 { 2114 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2115 PetscInt i,j,k,count; 2116 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2117 PetscScalar zero = 0.0; 2118 MatScalar *aa; 2119 const PetscScalar *xx; 2120 PetscScalar *bb; 2121 PetscBool *zeroed,vecs = PETSC_FALSE; 2122 2123 PetscFunctionBegin; 2124 /* fix right hand side if needed */ 2125 if (x && b) { 2126 PetscCall(VecGetArrayRead(x,&xx)); 2127 PetscCall(VecGetArray(b,&bb)); 2128 vecs = PETSC_TRUE; 2129 } 2130 2131 /* zero the columns */ 2132 PetscCall(PetscCalloc1(A->rmap->n,&zeroed)); 2133 for (i=0; i<is_n; i++) { 2134 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %" PetscInt_FMT " out of range",is_idx[i]); 2135 zeroed[is_idx[i]] = PETSC_TRUE; 2136 } 2137 for (i=0; i<A->rmap->N; i++) { 2138 if (!zeroed[i]) { 2139 row = i/bs; 2140 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2141 for (k=0; k<bs; k++) { 2142 col = bs*baij->j[j] + k; 2143 if (zeroed[col]) { 2144 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2145 if (vecs) bb[i] -= aa[0]*xx[col]; 2146 aa[0] = 0.0; 2147 } 2148 } 2149 } 2150 } else if (vecs) bb[i] = diag*xx[i]; 2151 } 2152 PetscCall(PetscFree(zeroed)); 2153 if (vecs) { 2154 PetscCall(VecRestoreArrayRead(x,&xx)); 2155 PetscCall(VecRestoreArray(b,&bb)); 2156 } 2157 2158 /* zero the rows */ 2159 for (i=0; i<is_n; i++) { 2160 row = is_idx[i]; 2161 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2162 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2163 for (k=0; k<count; k++) { 2164 aa[0] = zero; 2165 aa += bs; 2166 } 2167 if (diag != (PetscScalar)0.0) { 2168 PetscCall((*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES)); 2169 } 2170 } 2171 PetscCall(MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY)); 2172 PetscFunctionReturn(0); 2173 } 2174 2175 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2176 { 2177 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2178 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2179 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2180 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2181 PetscInt ridx,cidx,bs2=a->bs2; 2182 PetscBool roworiented=a->roworiented; 2183 MatScalar *ap=NULL,value=0.0,*aa=a->a,*bap; 2184 2185 PetscFunctionBegin; 2186 for (k=0; k<m; k++) { /* loop over added rows */ 2187 row = im[k]; 2188 brow = row/bs; 2189 if (row < 0) continue; 2190 PetscCheck(row < A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,row,A->rmap->N-1); 2191 rp = aj + ai[brow]; 2192 if (!A->structure_only) ap = aa + bs2*ai[brow]; 2193 rmax = imax[brow]; 2194 nrow = ailen[brow]; 2195 low = 0; 2196 high = nrow; 2197 for (l=0; l<n; l++) { /* loop over added columns */ 2198 if (in[l] < 0) continue; 2199 PetscCheck(in[l] < A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[l],A->cmap->n-1); 2200 col = in[l]; bcol = col/bs; 2201 ridx = row % bs; cidx = col % bs; 2202 if (!A->structure_only) { 2203 if (roworiented) { 2204 value = v[l + k*n]; 2205 } else { 2206 value = v[k + l*m]; 2207 } 2208 } 2209 if (col <= lastcol) low = 0; else high = nrow; 2210 lastcol = col; 2211 while (high-low > 7) { 2212 t = (low+high)/2; 2213 if (rp[t] > bcol) high = t; 2214 else low = t; 2215 } 2216 for (i=low; i<high; i++) { 2217 if (rp[i] > bcol) break; 2218 if (rp[i] == bcol) { 2219 bap = ap + bs2*i + bs*cidx + ridx; 2220 if (!A->structure_only) { 2221 if (is == ADD_VALUES) *bap += value; 2222 else *bap = value; 2223 } 2224 goto noinsert1; 2225 } 2226 } 2227 if (nonew == 1) goto noinsert1; 2228 PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2229 if (A->structure_only) { 2230 MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar); 2231 } else { 2232 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2233 } 2234 N = nrow++ - 1; high++; 2235 /* shift up all the later entries in this row */ 2236 PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1)); 2237 rp[i] = bcol; 2238 if (!A->structure_only) { 2239 PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1))); 2240 PetscCall(PetscArrayzero(ap+bs2*i,bs2)); 2241 ap[bs2*i + bs*cidx + ridx] = value; 2242 } 2243 a->nz++; 2244 A->nonzerostate++; 2245 noinsert1:; 2246 low = i; 2247 } 2248 ailen[brow] = nrow; 2249 } 2250 PetscFunctionReturn(0); 2251 } 2252 2253 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2254 { 2255 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2256 Mat outA; 2257 PetscBool row_identity,col_identity; 2258 2259 PetscFunctionBegin; 2260 PetscCheck(info->levels == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2261 PetscCall(ISIdentity(row,&row_identity)); 2262 PetscCall(ISIdentity(col,&col_identity)); 2263 PetscCheck(row_identity && col_identity,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2264 2265 outA = inA; 2266 inA->factortype = MAT_FACTOR_LU; 2267 PetscCall(PetscFree(inA->solvertype)); 2268 PetscCall(PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype)); 2269 2270 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2271 2272 PetscCall(PetscObjectReference((PetscObject)row)); 2273 PetscCall(ISDestroy(&a->row)); 2274 a->row = row; 2275 PetscCall(PetscObjectReference((PetscObject)col)); 2276 PetscCall(ISDestroy(&a->col)); 2277 a->col = col; 2278 2279 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2280 PetscCall(ISDestroy(&a->icol)); 2281 PetscCall(ISInvertPermutation(col,PETSC_DECIDE,&a->icol)); 2282 PetscCall(PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol)); 2283 2284 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity))); 2285 if (!a->solve_work) { 2286 PetscCall(PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work)); 2287 PetscCall(PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar))); 2288 } 2289 PetscCall(MatLUFactorNumeric(outA,inA,info)); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2294 { 2295 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2296 PetscInt i,nz,mbs; 2297 2298 PetscFunctionBegin; 2299 nz = baij->maxnz; 2300 mbs = baij->mbs; 2301 for (i=0; i<nz; i++) { 2302 baij->j[i] = indices[i]; 2303 } 2304 baij->nz = nz; 2305 for (i=0; i<mbs; i++) { 2306 baij->ilen[i] = baij->imax[i]; 2307 } 2308 PetscFunctionReturn(0); 2309 } 2310 2311 /*@ 2312 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2313 in the matrix. 2314 2315 Input Parameters: 2316 + mat - the SeqBAIJ matrix 2317 - indices - the column indices 2318 2319 Level: advanced 2320 2321 Notes: 2322 This can be called if you have precomputed the nonzero structure of the 2323 matrix and want to provide it to the matrix object to improve the performance 2324 of the MatSetValues() operation. 2325 2326 You MUST have set the correct numbers of nonzeros per row in the call to 2327 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2328 2329 MUST be called before any calls to MatSetValues(); 2330 2331 @*/ 2332 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2333 { 2334 PetscFunctionBegin; 2335 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2336 PetscValidIntPointer(indices,2); 2337 PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices)); 2338 PetscFunctionReturn(0); 2339 } 2340 2341 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2342 { 2343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2344 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2345 PetscReal atmp; 2346 PetscScalar *x,zero = 0.0; 2347 MatScalar *aa; 2348 PetscInt ncols,brow,krow,kcol; 2349 2350 PetscFunctionBegin; 2351 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2352 PetscCheck(!A->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2353 bs = A->rmap->bs; 2354 aa = a->a; 2355 ai = a->i; 2356 aj = a->j; 2357 mbs = a->mbs; 2358 2359 PetscCall(VecSet(v,zero)); 2360 PetscCall(VecGetArray(v,&x)); 2361 PetscCall(VecGetLocalSize(v,&n)); 2362 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2363 for (i=0; i<mbs; i++) { 2364 ncols = ai[1] - ai[0]; ai++; 2365 brow = bs*i; 2366 for (j=0; j<ncols; j++) { 2367 for (kcol=0; kcol<bs; kcol++) { 2368 for (krow=0; krow<bs; krow++) { 2369 atmp = PetscAbsScalar(*aa);aa++; 2370 row = brow + krow; /* row index */ 2371 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2372 } 2373 } 2374 aj++; 2375 } 2376 } 2377 PetscCall(VecRestoreArray(v,&x)); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2382 { 2383 PetscFunctionBegin; 2384 /* If the two matrices have the same copy implementation, use fast copy. */ 2385 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2387 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2388 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2389 2390 PetscCheck(a->i[ambs] == b->i[bmbs],PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",a->i[ambs],b->i[bmbs]); 2391 PetscCheck(abs == bbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different",abs,bbs); 2392 PetscCall(PetscArraycpy(b->a,a->a,bs2*a->i[ambs])); 2393 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2394 } else { 2395 PetscCall(MatCopy_Basic(A,B,str)); 2396 } 2397 PetscFunctionReturn(0); 2398 } 2399 2400 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2401 { 2402 PetscFunctionBegin; 2403 PetscCall(MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL)); 2404 PetscFunctionReturn(0); 2405 } 2406 2407 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2408 { 2409 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2410 2411 PetscFunctionBegin; 2412 *array = a->a; 2413 PetscFunctionReturn(0); 2414 } 2415 2416 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2417 { 2418 PetscFunctionBegin; 2419 *array = NULL; 2420 PetscFunctionReturn(0); 2421 } 2422 2423 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2424 { 2425 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2426 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2427 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2428 2429 PetscFunctionBegin; 2430 /* Set the number of nonzeros in the new matrix */ 2431 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz)); 2432 PetscFunctionReturn(0); 2433 } 2434 2435 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2436 { 2437 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2438 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2439 PetscBLASInt one=1; 2440 2441 PetscFunctionBegin; 2442 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2443 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2444 if (e) { 2445 PetscCall(PetscArraycmp(x->i,y->i,x->mbs+1,&e)); 2446 if (e) { 2447 PetscCall(PetscArraycmp(x->j,y->j,x->i[x->mbs],&e)); 2448 if (e) str = SAME_NONZERO_PATTERN; 2449 } 2450 } 2451 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"MatStructure is not SAME_NONZERO_PATTERN"); 2452 } 2453 if (str == SAME_NONZERO_PATTERN) { 2454 PetscScalar alpha = a; 2455 PetscBLASInt bnz; 2456 PetscCall(PetscBLASIntCast(x->nz*bs2,&bnz)); 2457 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2458 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2459 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2460 PetscCall(MatAXPY_Basic(Y,a,X,str)); 2461 } else { 2462 Mat B; 2463 PetscInt *nnz; 2464 PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2465 PetscCall(PetscMalloc1(Y->rmap->N,&nnz)); 2466 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B)); 2467 PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name)); 2468 PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N)); 2469 PetscCall(MatSetBlockSizesFromMats(B,Y,Y)); 2470 PetscCall(MatSetType(B,(MatType) ((PetscObject)Y)->type_name)); 2471 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz)); 2472 PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz)); 2473 PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 2474 PetscCall(MatHeaderMerge(Y,&B)); 2475 PetscCall(PetscFree(nnz)); 2476 } 2477 PetscFunctionReturn(0); 2478 } 2479 2480 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2481 { 2482 #if defined(PETSC_USE_COMPLEX) 2483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2484 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2485 MatScalar *aa = a->a; 2486 2487 PetscFunctionBegin; 2488 for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]); 2489 #else 2490 PetscFunctionBegin; 2491 #endif 2492 PetscFunctionReturn(0); 2493 } 2494 2495 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2496 { 2497 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2498 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2499 MatScalar *aa = a->a; 2500 2501 PetscFunctionBegin; 2502 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2507 { 2508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2509 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2510 MatScalar *aa = a->a; 2511 2512 PetscFunctionBegin; 2513 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 /* 2518 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2519 */ 2520 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2521 { 2522 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2523 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2524 PetscInt nz = a->i[m],row,*jj,mr,col; 2525 2526 PetscFunctionBegin; 2527 *nn = n; 2528 if (!ia) PetscFunctionReturn(0); 2529 PetscCheck(!symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2530 PetscCall(PetscCalloc1(n,&collengths)); 2531 PetscCall(PetscMalloc1(n+1,&cia)); 2532 PetscCall(PetscMalloc1(nz,&cja)); 2533 jj = a->j; 2534 for (i=0; i<nz; i++) { 2535 collengths[jj[i]]++; 2536 } 2537 cia[0] = oshift; 2538 for (i=0; i<n; i++) { 2539 cia[i+1] = cia[i] + collengths[i]; 2540 } 2541 PetscCall(PetscArrayzero(collengths,n)); 2542 jj = a->j; 2543 for (row=0; row<m; row++) { 2544 mr = a->i[row+1] - a->i[row]; 2545 for (i=0; i<mr; i++) { 2546 col = *jj++; 2547 2548 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2549 } 2550 } 2551 PetscCall(PetscFree(collengths)); 2552 *ia = cia; *ja = cja; 2553 PetscFunctionReturn(0); 2554 } 2555 2556 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2557 { 2558 PetscFunctionBegin; 2559 if (!ia) PetscFunctionReturn(0); 2560 PetscCall(PetscFree(*ia)); 2561 PetscCall(PetscFree(*ja)); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 /* 2566 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2567 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2568 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2569 */ 2570 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2571 { 2572 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2573 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2574 PetscInt nz = a->i[m],row,*jj,mr,col; 2575 PetscInt *cspidx; 2576 2577 PetscFunctionBegin; 2578 *nn = n; 2579 if (!ia) PetscFunctionReturn(0); 2580 2581 PetscCall(PetscCalloc1(n,&collengths)); 2582 PetscCall(PetscMalloc1(n+1,&cia)); 2583 PetscCall(PetscMalloc1(nz,&cja)); 2584 PetscCall(PetscMalloc1(nz,&cspidx)); 2585 jj = a->j; 2586 for (i=0; i<nz; i++) { 2587 collengths[jj[i]]++; 2588 } 2589 cia[0] = oshift; 2590 for (i=0; i<n; i++) { 2591 cia[i+1] = cia[i] + collengths[i]; 2592 } 2593 PetscCall(PetscArrayzero(collengths,n)); 2594 jj = a->j; 2595 for (row=0; row<m; row++) { 2596 mr = a->i[row+1] - a->i[row]; 2597 for (i=0; i<mr; i++) { 2598 col = *jj++; 2599 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2600 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2601 } 2602 } 2603 PetscCall(PetscFree(collengths)); 2604 *ia = cia; 2605 *ja = cja; 2606 *spidx = cspidx; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2611 { 2612 PetscFunctionBegin; 2613 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done)); 2614 PetscCall(PetscFree(*spidx)); 2615 PetscFunctionReturn(0); 2616 } 2617 2618 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2619 { 2620 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2621 2622 PetscFunctionBegin; 2623 if (!Y->preallocated || !aij->nz) { 2624 PetscCall(MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL)); 2625 } 2626 PetscCall(MatShift_Basic(Y,a)); 2627 PetscFunctionReturn(0); 2628 } 2629 2630 /* -------------------------------------------------------------------*/ 2631 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2632 MatGetRow_SeqBAIJ, 2633 MatRestoreRow_SeqBAIJ, 2634 MatMult_SeqBAIJ_N, 2635 /* 4*/ MatMultAdd_SeqBAIJ_N, 2636 MatMultTranspose_SeqBAIJ, 2637 MatMultTransposeAdd_SeqBAIJ, 2638 NULL, 2639 NULL, 2640 NULL, 2641 /* 10*/ NULL, 2642 MatLUFactor_SeqBAIJ, 2643 NULL, 2644 NULL, 2645 MatTranspose_SeqBAIJ, 2646 /* 15*/ MatGetInfo_SeqBAIJ, 2647 MatEqual_SeqBAIJ, 2648 MatGetDiagonal_SeqBAIJ, 2649 MatDiagonalScale_SeqBAIJ, 2650 MatNorm_SeqBAIJ, 2651 /* 20*/ NULL, 2652 MatAssemblyEnd_SeqBAIJ, 2653 MatSetOption_SeqBAIJ, 2654 MatZeroEntries_SeqBAIJ, 2655 /* 24*/ MatZeroRows_SeqBAIJ, 2656 NULL, 2657 NULL, 2658 NULL, 2659 NULL, 2660 /* 29*/ MatSetUp_SeqBAIJ, 2661 NULL, 2662 NULL, 2663 NULL, 2664 NULL, 2665 /* 34*/ MatDuplicate_SeqBAIJ, 2666 NULL, 2667 NULL, 2668 MatILUFactor_SeqBAIJ, 2669 NULL, 2670 /* 39*/ MatAXPY_SeqBAIJ, 2671 MatCreateSubMatrices_SeqBAIJ, 2672 MatIncreaseOverlap_SeqBAIJ, 2673 MatGetValues_SeqBAIJ, 2674 MatCopy_SeqBAIJ, 2675 /* 44*/ NULL, 2676 MatScale_SeqBAIJ, 2677 MatShift_SeqBAIJ, 2678 NULL, 2679 MatZeroRowsColumns_SeqBAIJ, 2680 /* 49*/ NULL, 2681 MatGetRowIJ_SeqBAIJ, 2682 MatRestoreRowIJ_SeqBAIJ, 2683 MatGetColumnIJ_SeqBAIJ, 2684 MatRestoreColumnIJ_SeqBAIJ, 2685 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2686 NULL, 2687 NULL, 2688 NULL, 2689 MatSetValuesBlocked_SeqBAIJ, 2690 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2691 MatDestroy_SeqBAIJ, 2692 MatView_SeqBAIJ, 2693 NULL, 2694 NULL, 2695 /* 64*/ NULL, 2696 NULL, 2697 NULL, 2698 NULL, 2699 NULL, 2700 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2701 NULL, 2702 MatConvert_Basic, 2703 NULL, 2704 NULL, 2705 /* 74*/ NULL, 2706 MatFDColoringApply_BAIJ, 2707 NULL, 2708 NULL, 2709 NULL, 2710 /* 79*/ NULL, 2711 NULL, 2712 NULL, 2713 NULL, 2714 MatLoad_SeqBAIJ, 2715 /* 84*/ NULL, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /* 89*/ NULL, 2721 NULL, 2722 NULL, 2723 NULL, 2724 NULL, 2725 /* 94*/ NULL, 2726 NULL, 2727 NULL, 2728 NULL, 2729 NULL, 2730 /* 99*/ NULL, 2731 NULL, 2732 NULL, 2733 MatConjugate_SeqBAIJ, 2734 NULL, 2735 /*104*/ NULL, 2736 MatRealPart_SeqBAIJ, 2737 MatImaginaryPart_SeqBAIJ, 2738 NULL, 2739 NULL, 2740 /*109*/ NULL, 2741 NULL, 2742 NULL, 2743 NULL, 2744 MatMissingDiagonal_SeqBAIJ, 2745 /*114*/ NULL, 2746 NULL, 2747 NULL, 2748 NULL, 2749 NULL, 2750 /*119*/ NULL, 2751 NULL, 2752 MatMultHermitianTranspose_SeqBAIJ, 2753 MatMultHermitianTransposeAdd_SeqBAIJ, 2754 NULL, 2755 /*124*/ NULL, 2756 MatGetColumnReductions_SeqBAIJ, 2757 MatInvertBlockDiagonal_SeqBAIJ, 2758 NULL, 2759 NULL, 2760 /*129*/ NULL, 2761 NULL, 2762 NULL, 2763 NULL, 2764 NULL, 2765 /*134*/ NULL, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /*139*/ MatSetBlockSizes_Default, 2771 NULL, 2772 NULL, 2773 MatFDColoringSetUp_SeqXAIJ, 2774 NULL, 2775 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 2776 MatDestroySubMatrices_SeqBAIJ, 2777 NULL, 2778 NULL, 2779 NULL, 2780 NULL 2781 }; 2782 2783 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2784 { 2785 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2786 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2787 2788 PetscFunctionBegin; 2789 PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2790 2791 /* allocate space for values if not already there */ 2792 if (!aij->saved_values) { 2793 PetscCall(PetscMalloc1(nz+1,&aij->saved_values)); 2794 PetscCall(PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar))); 2795 } 2796 2797 /* copy values over */ 2798 PetscCall(PetscArraycpy(aij->saved_values,aij->a,nz)); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2803 { 2804 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2805 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2806 2807 PetscFunctionBegin; 2808 PetscCheck(aij->nonew == 1,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2809 PetscCheck(aij->saved_values,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2810 2811 /* copy values over */ 2812 PetscCall(PetscArraycpy(aij->a,aij->saved_values,nz)); 2813 PetscFunctionReturn(0); 2814 } 2815 2816 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2817 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2818 2819 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2820 { 2821 Mat_SeqBAIJ *b; 2822 PetscInt i,mbs,nbs,bs2; 2823 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2824 2825 PetscFunctionBegin; 2826 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2827 if (nz == MAT_SKIP_ALLOCATION) { 2828 skipallocation = PETSC_TRUE; 2829 nz = 0; 2830 } 2831 2832 PetscCall(MatSetBlockSize(B,PetscAbs(bs))); 2833 PetscCall(PetscLayoutSetUp(B->rmap)); 2834 PetscCall(PetscLayoutSetUp(B->cmap)); 2835 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 2836 2837 B->preallocated = PETSC_TRUE; 2838 2839 mbs = B->rmap->n/bs; 2840 nbs = B->cmap->n/bs; 2841 bs2 = bs*bs; 2842 2843 PetscCheck(mbs*bs==B->rmap->n && nbs*bs==B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,B->cmap->n,bs); 2844 2845 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2846 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %" PetscInt_FMT,nz); 2847 if (nnz) { 2848 for (i=0; i<mbs; i++) { 2849 PetscCheck(nnz[i] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,nnz[i]); 2850 PetscCheck(nnz[i] <= nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT,i,nnz[i],nbs); 2851 } 2852 } 2853 2854 b = (Mat_SeqBAIJ*)B->data; 2855 PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat"); 2856 PetscCall(PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL)); 2857 PetscOptionsEnd(); 2858 2859 if (!flg) { 2860 switch (bs) { 2861 case 1: 2862 B->ops->mult = MatMult_SeqBAIJ_1; 2863 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2864 break; 2865 case 2: 2866 B->ops->mult = MatMult_SeqBAIJ_2; 2867 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2868 break; 2869 case 3: 2870 B->ops->mult = MatMult_SeqBAIJ_3; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2872 break; 2873 case 4: 2874 B->ops->mult = MatMult_SeqBAIJ_4; 2875 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2876 break; 2877 case 5: 2878 B->ops->mult = MatMult_SeqBAIJ_5; 2879 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2880 break; 2881 case 6: 2882 B->ops->mult = MatMult_SeqBAIJ_6; 2883 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2884 break; 2885 case 7: 2886 B->ops->mult = MatMult_SeqBAIJ_7; 2887 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2888 break; 2889 case 9: 2890 { 2891 PetscInt version = 1; 2892 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2893 switch (version) { 2894 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2895 case 1: 2896 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 2897 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 2898 PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2899 break; 2900 #endif 2901 default: 2902 B->ops->mult = MatMult_SeqBAIJ_N; 2903 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2904 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2905 break; 2906 } 2907 break; 2908 } 2909 case 11: 2910 B->ops->mult = MatMult_SeqBAIJ_11; 2911 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 2912 break; 2913 case 12: 2914 { 2915 PetscInt version = 1; 2916 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2917 switch (version) { 2918 case 1: 2919 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 2920 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2921 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2922 break; 2923 case 2: 2924 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 2925 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 2926 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2927 break; 2928 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 2929 case 3: 2930 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 2931 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 2932 PetscCall(PetscInfo((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2933 break; 2934 #endif 2935 default: 2936 B->ops->mult = MatMult_SeqBAIJ_N; 2937 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2938 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2939 break; 2940 } 2941 break; 2942 } 2943 case 15: 2944 { 2945 PetscInt version = 1; 2946 PetscCall(PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL)); 2947 switch (version) { 2948 case 1: 2949 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2950 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2951 break; 2952 case 2: 2953 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 2954 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2955 break; 2956 case 3: 2957 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 2958 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2959 break; 2960 case 4: 2961 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 2962 PetscCall(PetscInfo((PetscObject)B,"Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",version,bs)); 2963 break; 2964 default: 2965 B->ops->mult = MatMult_SeqBAIJ_N; 2966 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2967 break; 2968 } 2969 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2970 break; 2971 } 2972 default: 2973 B->ops->mult = MatMult_SeqBAIJ_N; 2974 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2975 PetscCall(PetscInfo((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n",bs)); 2976 break; 2977 } 2978 } 2979 B->ops->sor = MatSOR_SeqBAIJ; 2980 b->mbs = mbs; 2981 b->nbs = nbs; 2982 if (!skipallocation) { 2983 if (!b->imax) { 2984 PetscCall(PetscMalloc2(mbs,&b->imax,mbs,&b->ilen)); 2985 PetscCall(PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt))); 2986 2987 b->free_imax_ilen = PETSC_TRUE; 2988 } 2989 /* b->ilen will count nonzeros in each block row so far. */ 2990 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2991 if (!nnz) { 2992 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2993 else if (nz < 0) nz = 1; 2994 nz = PetscMin(nz,nbs); 2995 for (i=0; i<mbs; i++) b->imax[i] = nz; 2996 PetscCall(PetscIntMultError(nz,mbs,&nz)); 2997 } else { 2998 PetscInt64 nz64 = 0; 2999 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];} 3000 PetscCall(PetscIntCast(nz64,&nz)); 3001 } 3002 3003 /* allocate the matrix space */ 3004 PetscCall(MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i)); 3005 if (B->structure_only) { 3006 PetscCall(PetscMalloc1(nz,&b->j)); 3007 PetscCall(PetscMalloc1(B->rmap->N+1,&b->i)); 3008 PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt))); 3009 } else { 3010 PetscInt nzbs2 = 0; 3011 PetscCall(PetscIntMultError(nz,bs2,&nzbs2)); 3012 PetscCall(PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i)); 3013 PetscCall(PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)))); 3014 PetscCall(PetscArrayzero(b->a,nz*bs2)); 3015 } 3016 PetscCall(PetscArrayzero(b->j,nz)); 3017 3018 if (B->structure_only) { 3019 b->singlemalloc = PETSC_FALSE; 3020 b->free_a = PETSC_FALSE; 3021 } else { 3022 b->singlemalloc = PETSC_TRUE; 3023 b->free_a = PETSC_TRUE; 3024 } 3025 b->free_ij = PETSC_TRUE; 3026 3027 b->i[0] = 0; 3028 for (i=1; i<mbs+1; i++) { 3029 b->i[i] = b->i[i-1] + b->imax[i-1]; 3030 } 3031 3032 } else { 3033 b->free_a = PETSC_FALSE; 3034 b->free_ij = PETSC_FALSE; 3035 } 3036 3037 b->bs2 = bs2; 3038 b->mbs = mbs; 3039 b->nz = 0; 3040 b->maxnz = nz; 3041 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 3042 B->was_assembled = PETSC_FALSE; 3043 B->assembled = PETSC_FALSE; 3044 if (realalloc) PetscCall(MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE)); 3045 PetscFunctionReturn(0); 3046 } 3047 3048 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 3049 { 3050 PetscInt i,m,nz,nz_max=0,*nnz; 3051 PetscScalar *values=NULL; 3052 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 3053 3054 PetscFunctionBegin; 3055 PetscCheck(bs >= 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs); 3056 PetscCall(PetscLayoutSetBlockSize(B->rmap,bs)); 3057 PetscCall(PetscLayoutSetBlockSize(B->cmap,bs)); 3058 PetscCall(PetscLayoutSetUp(B->rmap)); 3059 PetscCall(PetscLayoutSetUp(B->cmap)); 3060 PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs)); 3061 m = B->rmap->n/bs; 3062 3063 PetscCheck(ii[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 3064 PetscCall(PetscMalloc1(m+1, &nnz)); 3065 for (i=0; i<m; i++) { 3066 nz = ii[i+1]- ii[i]; 3067 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 3068 nz_max = PetscMax(nz_max, nz); 3069 nnz[i] = nz; 3070 } 3071 PetscCall(MatSeqBAIJSetPreallocation(B,bs,0,nnz)); 3072 PetscCall(PetscFree(nnz)); 3073 3074 values = (PetscScalar*)V; 3075 if (!values) { 3076 PetscCall(PetscCalloc1(bs*bs*(nz_max+1),&values)); 3077 } 3078 for (i=0; i<m; i++) { 3079 PetscInt ncols = ii[i+1] - ii[i]; 3080 const PetscInt *icols = jj + ii[i]; 3081 if (bs == 1 || !roworiented) { 3082 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3083 PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES)); 3084 } else { 3085 PetscInt j; 3086 for (j=0; j<ncols; j++) { 3087 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3088 PetscCall(MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES)); 3089 } 3090 } 3091 } 3092 if (!V) PetscCall(PetscFree(values)); 3093 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 3094 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 3095 PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3096 PetscFunctionReturn(0); 3097 } 3098 3099 /*@C 3100 MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored 3101 3102 Not Collective 3103 3104 Input Parameter: 3105 . mat - a MATSEQBAIJ matrix 3106 3107 Output Parameter: 3108 . array - pointer to the data 3109 3110 Level: intermediate 3111 3112 .seealso: `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3113 @*/ 3114 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array) 3115 { 3116 PetscFunctionBegin; 3117 PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array)); 3118 PetscFunctionReturn(0); 3119 } 3120 3121 /*@C 3122 MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray() 3123 3124 Not Collective 3125 3126 Input Parameters: 3127 + mat - a MATSEQBAIJ matrix 3128 - array - pointer to the data 3129 3130 Level: intermediate 3131 3132 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3133 @*/ 3134 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array) 3135 { 3136 PetscFunctionBegin; 3137 PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array)); 3138 PetscFunctionReturn(0); 3139 } 3140 3141 /*MC 3142 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3143 block sparse compressed row format. 3144 3145 Options Database Keys: 3146 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3147 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3148 3149 Level: beginner 3150 3151 Notes: 3152 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 3153 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 3154 3155 Run with -info to see what version of the matrix-vector product is being used 3156 3157 .seealso: `MatCreateSeqBAIJ()` 3158 M*/ 3159 3160 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3161 3162 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3163 { 3164 PetscMPIInt size; 3165 Mat_SeqBAIJ *b; 3166 3167 PetscFunctionBegin; 3168 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size)); 3169 PetscCheck(size == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3170 3171 PetscCall(PetscNewLog(B,&b)); 3172 B->data = (void*)b; 3173 PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps))); 3174 3175 b->row = NULL; 3176 b->col = NULL; 3177 b->icol = NULL; 3178 b->reallocs = 0; 3179 b->saved_values = NULL; 3180 3181 b->roworiented = PETSC_TRUE; 3182 b->nonew = 0; 3183 b->diag = NULL; 3184 B->spptr = NULL; 3185 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3186 b->keepnonzeropattern = PETSC_FALSE; 3187 3188 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ)); 3189 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ)); 3190 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ)); 3191 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ)); 3192 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3193 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ)); 3194 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ)); 3195 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ)); 3196 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3197 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ)); 3198 #if defined(PETSC_HAVE_HYPRE) 3199 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE)); 3200 #endif 3201 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS)); 3202 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ)); 3203 PetscFunctionReturn(0); 3204 } 3205 3206 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3207 { 3208 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3209 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3210 3211 PetscFunctionBegin; 3212 PetscCheck(a->i[mbs] == nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3213 3214 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3215 c->imax = a->imax; 3216 c->ilen = a->ilen; 3217 c->free_imax_ilen = PETSC_FALSE; 3218 } else { 3219 PetscCall(PetscMalloc2(mbs,&c->imax,mbs,&c->ilen)); 3220 PetscCall(PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt))); 3221 for (i=0; i<mbs; i++) { 3222 c->imax[i] = a->imax[i]; 3223 c->ilen[i] = a->ilen[i]; 3224 } 3225 c->free_imax_ilen = PETSC_TRUE; 3226 } 3227 3228 /* allocate the matrix space */ 3229 if (mallocmatspace) { 3230 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3231 PetscCall(PetscCalloc1(bs2*nz,&c->a)); 3232 PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar))); 3233 3234 c->i = a->i; 3235 c->j = a->j; 3236 c->singlemalloc = PETSC_FALSE; 3237 c->free_a = PETSC_TRUE; 3238 c->free_ij = PETSC_FALSE; 3239 c->parent = A; 3240 C->preallocated = PETSC_TRUE; 3241 C->assembled = PETSC_TRUE; 3242 3243 PetscCall(PetscObjectReference((PetscObject)A)); 3244 PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3245 PetscCall(MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE)); 3246 } else { 3247 PetscCall(PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i)); 3248 PetscCall(PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt))); 3249 3250 c->singlemalloc = PETSC_TRUE; 3251 c->free_a = PETSC_TRUE; 3252 c->free_ij = PETSC_TRUE; 3253 3254 PetscCall(PetscArraycpy(c->i,a->i,mbs+1)); 3255 if (mbs > 0) { 3256 PetscCall(PetscArraycpy(c->j,a->j,nz)); 3257 if (cpvalues == MAT_COPY_VALUES) { 3258 PetscCall(PetscArraycpy(c->a,a->a,bs2*nz)); 3259 } else { 3260 PetscCall(PetscArrayzero(c->a,bs2*nz)); 3261 } 3262 } 3263 C->preallocated = PETSC_TRUE; 3264 C->assembled = PETSC_TRUE; 3265 } 3266 } 3267 3268 c->roworiented = a->roworiented; 3269 c->nonew = a->nonew; 3270 3271 PetscCall(PetscLayoutReference(A->rmap,&C->rmap)); 3272 PetscCall(PetscLayoutReference(A->cmap,&C->cmap)); 3273 3274 c->bs2 = a->bs2; 3275 c->mbs = a->mbs; 3276 c->nbs = a->nbs; 3277 3278 if (a->diag) { 3279 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3280 c->diag = a->diag; 3281 c->free_diag = PETSC_FALSE; 3282 } else { 3283 PetscCall(PetscMalloc1(mbs+1,&c->diag)); 3284 PetscCall(PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt))); 3285 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3286 c->free_diag = PETSC_TRUE; 3287 } 3288 } else c->diag = NULL; 3289 3290 c->nz = a->nz; 3291 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3292 c->solve_work = NULL; 3293 c->mult_work = NULL; 3294 c->sor_workt = NULL; 3295 c->sor_work = NULL; 3296 3297 c->compressedrow.use = a->compressedrow.use; 3298 c->compressedrow.nrows = a->compressedrow.nrows; 3299 if (a->compressedrow.use) { 3300 i = a->compressedrow.nrows; 3301 PetscCall(PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex)); 3302 PetscCall(PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt))); 3303 PetscCall(PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1)); 3304 PetscCall(PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i)); 3305 } else { 3306 c->compressedrow.use = PETSC_FALSE; 3307 c->compressedrow.i = NULL; 3308 c->compressedrow.rindex = NULL; 3309 } 3310 C->nonzerostate = A->nonzerostate; 3311 3312 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist)); 3313 PetscFunctionReturn(0); 3314 } 3315 3316 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3317 { 3318 PetscFunctionBegin; 3319 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),B)); 3320 PetscCall(MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n)); 3321 PetscCall(MatSetType(*B,MATSEQBAIJ)); 3322 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE)); 3323 PetscFunctionReturn(0); 3324 } 3325 3326 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3327 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer) 3328 { 3329 PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3330 PetscInt *rowidxs,*colidxs; 3331 PetscScalar *matvals; 3332 3333 PetscFunctionBegin; 3334 PetscCall(PetscViewerSetUp(viewer)); 3335 3336 /* read matrix header */ 3337 PetscCall(PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT)); 3338 PetscCheck(header[0] == MAT_FILE_CLASSID,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3339 M = header[1]; N = header[2]; nz = header[3]; 3340 PetscCheck(M >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 3341 PetscCheck(N >= 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 3342 PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3343 3344 /* set block sizes from the viewer's .info file */ 3345 PetscCall(MatLoad_Binary_BlockSizes(mat,viewer)); 3346 /* set local and global sizes if not set already */ 3347 if (mat->rmap->n < 0) mat->rmap->n = M; 3348 if (mat->cmap->n < 0) mat->cmap->n = N; 3349 if (mat->rmap->N < 0) mat->rmap->N = M; 3350 if (mat->cmap->N < 0) mat->cmap->N = N; 3351 PetscCall(PetscLayoutSetUp(mat->rmap)); 3352 PetscCall(PetscLayoutSetUp(mat->cmap)); 3353 3354 /* check if the matrix sizes are correct */ 3355 PetscCall(MatGetSize(mat,&rows,&cols)); 3356 PetscCheck(M == rows && N == cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3357 PetscCall(MatGetBlockSize(mat,&bs)); 3358 PetscCall(MatGetLocalSize(mat,&m,&n)); 3359 mbs = m/bs; nbs = n/bs; 3360 3361 /* read in row lengths, column indices and nonzero values */ 3362 PetscCall(PetscMalloc1(m+1,&rowidxs)); 3363 PetscCall(PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT)); 3364 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3365 sum = rowidxs[m]; 3366 PetscCheck(sum == nz,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3367 3368 /* read in column indices and nonzero values */ 3369 PetscCall(PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals)); 3370 PetscCall(PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT)); 3371 PetscCall(PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR)); 3372 3373 { /* preallocate matrix storage */ 3374 PetscBT bt; /* helper bit set to count nonzeros */ 3375 PetscInt *nnz; 3376 PetscBool sbaij; 3377 3378 PetscCall(PetscBTCreate(nbs,&bt)); 3379 PetscCall(PetscCalloc1(mbs,&nnz)); 3380 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij)); 3381 for (i=0; i<mbs; i++) { 3382 PetscCall(PetscBTMemzero(nbs,bt)); 3383 for (k=0; k<bs; k++) { 3384 PetscInt row = bs*i + k; 3385 for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3386 PetscInt col = colidxs[j]; 3387 if (!sbaij || col >= row) 3388 if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++; 3389 } 3390 } 3391 } 3392 PetscCall(PetscBTDestroy(&bt)); 3393 PetscCall(MatSeqBAIJSetPreallocation(mat,bs,0,nnz)); 3394 PetscCall(MatSeqSBAIJSetPreallocation(mat,bs,0,nnz)); 3395 PetscCall(PetscFree(nnz)); 3396 } 3397 3398 /* store matrix values */ 3399 for (i=0; i<m; i++) { 3400 PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1]; 3401 PetscCall((*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES)); 3402 } 3403 3404 PetscCall(PetscFree(rowidxs)); 3405 PetscCall(PetscFree2(colidxs,matvals)); 3406 PetscCall(MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY)); 3407 PetscCall(MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY)); 3408 PetscFunctionReturn(0); 3409 } 3410 3411 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer) 3412 { 3413 PetscBool isbinary; 3414 3415 PetscFunctionBegin; 3416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 3417 PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3418 PetscCall(MatLoad_SeqBAIJ_Binary(mat,viewer)); 3419 PetscFunctionReturn(0); 3420 } 3421 3422 /*@C 3423 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3424 compressed row) format. For good matrix assembly performance the 3425 user should preallocate the matrix storage by setting the parameter nz 3426 (or the array nnz). By setting these parameters accurately, performance 3427 during matrix assembly can be increased by more than a factor of 50. 3428 3429 Collective 3430 3431 Input Parameters: 3432 + comm - MPI communicator, set to PETSC_COMM_SELF 3433 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3434 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3435 . m - number of rows 3436 . n - number of columns 3437 . nz - number of nonzero blocks per block row (same for all rows) 3438 - nnz - array containing the number of nonzero blocks in the various block rows 3439 (possibly different for each block row) or NULL 3440 3441 Output Parameter: 3442 . A - the matrix 3443 3444 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3445 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3446 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3447 3448 Options Database Keys: 3449 + -mat_no_unroll - uses code that does not unroll the loops in the 3450 block calculations (much slower) 3451 - -mat_block_size - size of the blocks to use 3452 3453 Level: intermediate 3454 3455 Notes: 3456 The number of rows and columns must be divisible by blocksize. 3457 3458 If the nnz parameter is given then the nz parameter is ignored 3459 3460 A nonzero block is any block that as 1 or more nonzeros in it 3461 3462 The block AIJ format is fully compatible with standard Fortran 77 3463 storage. That is, the stored row and column indices can begin at 3464 either one (as in Fortran) or zero. See the users' manual for details. 3465 3466 Specify the preallocated storage with either nz or nnz (not both). 3467 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3468 allocation. See Users-Manual: ch_mat for details. 3469 matrices. 3470 3471 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3472 @*/ 3473 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3474 { 3475 PetscFunctionBegin; 3476 PetscCall(MatCreate(comm,A)); 3477 PetscCall(MatSetSizes(*A,m,n,m,n)); 3478 PetscCall(MatSetType(*A,MATSEQBAIJ)); 3479 PetscCall(MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz)); 3480 PetscFunctionReturn(0); 3481 } 3482 3483 /*@C 3484 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3485 per row in the matrix. For good matrix assembly performance the 3486 user should preallocate the matrix storage by setting the parameter nz 3487 (or the array nnz). By setting these parameters accurately, performance 3488 during matrix assembly can be increased by more than a factor of 50. 3489 3490 Collective 3491 3492 Input Parameters: 3493 + B - the matrix 3494 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3495 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3496 . nz - number of block nonzeros per block row (same for all rows) 3497 - nnz - array containing the number of block nonzeros in the various block rows 3498 (possibly different for each block row) or NULL 3499 3500 Options Database Keys: 3501 + -mat_no_unroll - uses code that does not unroll the loops in the 3502 block calculations (much slower) 3503 - -mat_block_size - size of the blocks to use 3504 3505 Level: intermediate 3506 3507 Notes: 3508 If the nnz parameter is given then the nz parameter is ignored 3509 3510 You can call MatGetInfo() to get information on how effective the preallocation was; 3511 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3512 You can also run with the option -info and look for messages with the string 3513 malloc in them to see if additional memory allocation was needed. 3514 3515 The block AIJ format is fully compatible with standard Fortran 77 3516 storage. That is, the stored row and column indices can begin at 3517 either one (as in Fortran) or zero. See the users' manual for details. 3518 3519 Specify the preallocated storage with either nz or nnz (not both). 3520 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3521 allocation. See Users-Manual: ch_mat for details. 3522 3523 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3524 @*/ 3525 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3526 { 3527 PetscFunctionBegin; 3528 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3529 PetscValidType(B,1); 3530 PetscValidLogicalCollectiveInt(B,bs,2); 3531 PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz)); 3532 PetscFunctionReturn(0); 3533 } 3534 3535 /*@C 3536 MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 3537 3538 Collective 3539 3540 Input Parameters: 3541 + B - the matrix 3542 . i - the indices into j for the start of each local row (starts with zero) 3543 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3544 - v - optional values in the matrix 3545 3546 Level: advanced 3547 3548 Notes: 3549 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3550 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3551 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3552 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3553 block column and the second index is over columns within a block. 3554 3555 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3556 3557 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3558 @*/ 3559 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3560 { 3561 PetscFunctionBegin; 3562 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3563 PetscValidType(B,1); 3564 PetscValidLogicalCollectiveInt(B,bs,2); 3565 PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v)); 3566 PetscFunctionReturn(0); 3567 } 3568 3569 /*@ 3570 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3571 3572 Collective 3573 3574 Input Parameters: 3575 + comm - must be an MPI communicator of size 1 3576 . bs - size of block 3577 . m - number of rows 3578 . n - number of columns 3579 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3580 . j - column indices 3581 - a - matrix values 3582 3583 Output Parameter: 3584 . mat - the matrix 3585 3586 Level: advanced 3587 3588 Notes: 3589 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3590 once the matrix is destroyed 3591 3592 You cannot set new nonzero locations into this matrix, that will generate an error. 3593 3594 The i and j indices are 0 based 3595 3596 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3597 3598 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3599 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3600 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3601 with column-major ordering within blocks. 3602 3603 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3604 3605 @*/ 3606 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat) 3607 { 3608 PetscInt ii; 3609 Mat_SeqBAIJ *baij; 3610 3611 PetscFunctionBegin; 3612 PetscCheck(bs == 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %" PetscInt_FMT " > 1 is not supported yet",bs); 3613 if (m > 0) PetscCheck(i[0] == 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3614 3615 PetscCall(MatCreate(comm,mat)); 3616 PetscCall(MatSetSizes(*mat,m,n,m,n)); 3617 PetscCall(MatSetType(*mat,MATSEQBAIJ)); 3618 PetscCall(MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL)); 3619 baij = (Mat_SeqBAIJ*)(*mat)->data; 3620 PetscCall(PetscMalloc2(m,&baij->imax,m,&baij->ilen)); 3621 PetscCall(PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt))); 3622 3623 baij->i = i; 3624 baij->j = j; 3625 baij->a = a; 3626 3627 baij->singlemalloc = PETSC_FALSE; 3628 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3629 baij->free_a = PETSC_FALSE; 3630 baij->free_ij = PETSC_FALSE; 3631 3632 for (ii=0; ii<m; ii++) { 3633 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3634 PetscCheck(i[ii+1] - i[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT,ii,i[ii+1] - i[ii]); 3635 } 3636 if (PetscDefined(USE_DEBUG)) { 3637 for (ii=0; ii<baij->i[m]; ii++) { 3638 PetscCheck(j[ii] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 3639 PetscCheck(j[ii] <= n - 1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT,ii,j[ii]); 3640 } 3641 } 3642 3643 PetscCall(MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY)); 3644 PetscCall(MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY)); 3645 PetscFunctionReturn(0); 3646 } 3647 3648 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3649 { 3650 PetscMPIInt size; 3651 3652 PetscFunctionBegin; 3653 PetscCallMPI(MPI_Comm_size(comm,&size)); 3654 if (size == 1 && scall == MAT_REUSE_MATRIX) { 3655 PetscCall(MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN)); 3656 } else { 3657 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat)); 3658 } 3659 PetscFunctionReturn(0); 3660 } 3661