1 /* 2 Defines the basic matrix operations for the BAIJ (compressed row) 3 matrix storage format. 4 */ 5 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 6 #include <petscblaslapack.h> 7 #include <petsc/private/kernels/blockinvert.h> 8 #include <petsc/private/kernels/blockmatmult.h> 9 10 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 11 #define TYPE BAIJ 12 #define TYPE_BS 13 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 14 #undef TYPE_BS 15 #define TYPE_BS _BS 16 #define TYPE_BS_ON 17 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 18 #undef TYPE_BS 19 #include "../src/mat/impls/aij/seq/seqhashmat.h" 20 #undef TYPE 21 #undef TYPE_BS_ON 22 23 #if defined(PETSC_HAVE_HYPRE) 24 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 25 #endif 26 27 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 28 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 29 #endif 30 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 31 32 MatGetDiagonalMarkers(SeqBAIJ, A->rmap->bs) 33 34 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 35 { 36 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 37 PetscInt m, n, ib, jb, bs = A->rmap->bs; 38 MatScalar *a_val = a_aij->a; 39 40 PetscFunctionBegin; 41 PetscCall(MatGetSize(A, &m, &n)); 42 PetscCall(PetscArrayzero(reductions, n)); 43 if (type == NORM_2) { 44 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 45 for (jb = 0; jb < bs; jb++) { 46 for (ib = 0; ib < bs; ib++) { 47 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 48 a_val++; 49 } 50 } 51 } 52 } else if (type == NORM_1) { 53 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 54 for (jb = 0; jb < bs; jb++) { 55 for (ib = 0; ib < bs; ib++) { 56 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 57 a_val++; 58 } 59 } 60 } 61 } else if (type == NORM_INFINITY) { 62 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 63 for (jb = 0; jb < bs; jb++) { 64 for (ib = 0; ib < bs; ib++) { 65 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 66 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 67 a_val++; 68 } 69 } 70 } 71 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 72 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 73 for (jb = 0; jb < bs; jb++) { 74 for (ib = 0; ib < bs; ib++) { 75 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 76 a_val++; 77 } 78 } 79 } 80 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 81 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 82 for (jb = 0; jb < bs; jb++) { 83 for (ib = 0; ib < bs; ib++) { 84 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 85 a_val++; 86 } 87 } 88 } 89 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 90 if (type == NORM_2) { 91 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 92 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 93 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 94 } 95 PetscFunctionReturn(PETSC_SUCCESS); 96 } 97 98 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 99 { 100 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 101 PetscInt i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 102 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 103 PetscReal shift = 0.0; 104 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 105 const PetscInt *adiag; 106 107 PetscFunctionBegin; 108 allowzeropivot = PetscNot(A->erroriffailure); 109 110 if (a->idiagvalid) { 111 if (values) *values = a->idiag; 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114 PetscCall(MatGetDiagonalMarkers_SeqBAIJ(A, &adiag, NULL)); 115 if (!a->idiag) PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); 116 diag = a->idiag; 117 if (values) *values = a->idiag; 118 /* factor and invert each block */ 119 switch (bs) { 120 case 1: 121 for (i = 0; i < mbs; i++) { 122 odiag = v + 1 * adiag[i]; 123 diag[0] = odiag[0]; 124 125 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 126 PetscCheck(allowzeropivot, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 127 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 128 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 129 A->factorerror_zeropivot_row = i; 130 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 131 } 132 133 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134 diag += 1; 135 } 136 break; 137 case 2: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 4 * adiag[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146 diag += 4; 147 } 148 break; 149 case 3: 150 for (i = 0; i < mbs; i++) { 151 odiag = v + 9 * adiag[i]; 152 diag[0] = odiag[0]; 153 diag[1] = odiag[1]; 154 diag[2] = odiag[2]; 155 diag[3] = odiag[3]; 156 diag[4] = odiag[4]; 157 diag[5] = odiag[5]; 158 diag[6] = odiag[6]; 159 diag[7] = odiag[7]; 160 diag[8] = odiag[8]; 161 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 9; 164 } 165 break; 166 case 4: 167 for (i = 0; i < mbs; i++) { 168 odiag = v + 16 * adiag[i]; 169 PetscCall(PetscArraycpy(diag, odiag, 16)); 170 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 16; 173 } 174 break; 175 case 5: 176 for (i = 0; i < mbs; i++) { 177 odiag = v + 25 * adiag[i]; 178 PetscCall(PetscArraycpy(diag, odiag, 25)); 179 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 25; 182 } 183 break; 184 case 6: 185 for (i = 0; i < mbs; i++) { 186 odiag = v + 36 * adiag[i]; 187 PetscCall(PetscArraycpy(diag, odiag, 36)); 188 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 189 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190 diag += 36; 191 } 192 break; 193 case 7: 194 for (i = 0; i < mbs; i++) { 195 odiag = v + 49 * adiag[i]; 196 PetscCall(PetscArraycpy(diag, odiag, 49)); 197 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 198 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199 diag += 49; 200 } 201 break; 202 default: 203 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204 for (i = 0; i < mbs; i++) { 205 odiag = v + bs2 * adiag[i]; 206 PetscCall(PetscArraycpy(diag, odiag, bs2)); 207 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 208 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209 diag += bs2; 210 } 211 PetscCall(PetscFree2(v_work, v_pivots)); 212 } 213 a->idiagvalid = PETSC_TRUE; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220 PetscScalar *x, *work, *w, *workt, *t; 221 const MatScalar *v, *aa = a->a, *idiag; 222 const PetscScalar *b, *xb; 223 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226 227 PetscFunctionBegin; 228 its = its * lits; 229 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 230 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 231 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 232 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 233 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234 235 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236 237 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238 diag = a->diag; 239 idiag = a->idiag; 240 k = PetscMax(A->rmap->n, A->cmap->n); 241 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 242 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 243 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 244 work = a->mult_work; 245 t = a->sor_workt; 246 w = a->sor_work; 247 248 PetscCall(VecGetArray(xx, &x)); 249 PetscCall(VecGetArrayRead(bb, &b)); 250 251 if (flag & SOR_ZERO_INITIAL_GUESS) { 252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253 switch (bs) { 254 case 1: 255 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256 t[0] = b[0]; 257 i2 = 1; 258 idiag += 1; 259 for (i = 1; i < m; i++) { 260 v = aa + ai[i]; 261 vi = aj + ai[i]; 262 nz = diag[i] - ai[i]; 263 s[0] = b[i2]; 264 for (j = 0; j < nz; j++) { 265 xw[0] = x[vi[j]]; 266 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267 } 268 t[i2] = s[0]; 269 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270 x[i2] = xw[0]; 271 idiag += 1; 272 i2 += 1; 273 } 274 break; 275 case 2: 276 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 277 t[0] = b[0]; 278 t[1] = b[1]; 279 i2 = 2; 280 idiag += 4; 281 for (i = 1; i < m; i++) { 282 v = aa + 4 * ai[i]; 283 vi = aj + ai[i]; 284 nz = diag[i] - ai[i]; 285 s[0] = b[i2]; 286 s[1] = b[i2 + 1]; 287 for (j = 0; j < nz; j++) { 288 idx = 2 * vi[j]; 289 it = 4 * j; 290 xw[0] = x[idx]; 291 xw[1] = x[1 + idx]; 292 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293 } 294 t[i2] = s[0]; 295 t[i2 + 1] = s[1]; 296 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 297 x[i2] = xw[0]; 298 x[i2 + 1] = xw[1]; 299 idiag += 4; 300 i2 += 2; 301 } 302 break; 303 case 3: 304 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 305 t[0] = b[0]; 306 t[1] = b[1]; 307 t[2] = b[2]; 308 i2 = 3; 309 idiag += 9; 310 for (i = 1; i < m; i++) { 311 v = aa + 9 * ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; 315 s[1] = b[i2 + 1]; 316 s[2] = b[i2 + 2]; 317 while (nz--) { 318 idx = 3 * (*vi++); 319 xw[0] = x[idx]; 320 xw[1] = x[1 + idx]; 321 xw[2] = x[2 + idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323 v += 9; 324 } 325 t[i2] = s[0]; 326 t[i2 + 1] = s[1]; 327 t[i2 + 2] = s[2]; 328 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 329 x[i2] = xw[0]; 330 x[i2 + 1] = xw[1]; 331 x[i2 + 2] = xw[2]; 332 idiag += 9; 333 i2 += 3; 334 } 335 break; 336 case 4: 337 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 338 t[0] = b[0]; 339 t[1] = b[1]; 340 t[2] = b[2]; 341 t[3] = b[3]; 342 i2 = 4; 343 idiag += 16; 344 for (i = 1; i < m; i++) { 345 v = aa + 16 * ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 s[0] = b[i2]; 349 s[1] = b[i2 + 1]; 350 s[2] = b[i2 + 2]; 351 s[3] = b[i2 + 3]; 352 while (nz--) { 353 idx = 4 * (*vi++); 354 xw[0] = x[idx]; 355 xw[1] = x[1 + idx]; 356 xw[2] = x[2 + idx]; 357 xw[3] = x[3 + idx]; 358 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359 v += 16; 360 } 361 t[i2] = s[0]; 362 t[i2 + 1] = s[1]; 363 t[i2 + 2] = s[2]; 364 t[i2 + 3] = s[3]; 365 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 366 x[i2] = xw[0]; 367 x[i2 + 1] = xw[1]; 368 x[i2 + 2] = xw[2]; 369 x[i2 + 3] = xw[3]; 370 idiag += 16; 371 i2 += 4; 372 } 373 break; 374 case 5: 375 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 376 t[0] = b[0]; 377 t[1] = b[1]; 378 t[2] = b[2]; 379 t[3] = b[3]; 380 t[4] = b[4]; 381 i2 = 5; 382 idiag += 25; 383 for (i = 1; i < m; i++) { 384 v = aa + 25 * ai[i]; 385 vi = aj + ai[i]; 386 nz = diag[i] - ai[i]; 387 s[0] = b[i2]; 388 s[1] = b[i2 + 1]; 389 s[2] = b[i2 + 2]; 390 s[3] = b[i2 + 3]; 391 s[4] = b[i2 + 4]; 392 while (nz--) { 393 idx = 5 * (*vi++); 394 xw[0] = x[idx]; 395 xw[1] = x[1 + idx]; 396 xw[2] = x[2 + idx]; 397 xw[3] = x[3 + idx]; 398 xw[4] = x[4 + idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400 v += 25; 401 } 402 t[i2] = s[0]; 403 t[i2 + 1] = s[1]; 404 t[i2 + 2] = s[2]; 405 t[i2 + 3] = s[3]; 406 t[i2 + 4] = s[4]; 407 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 408 x[i2] = xw[0]; 409 x[i2 + 1] = xw[1]; 410 x[i2 + 2] = xw[2]; 411 x[i2 + 3] = xw[3]; 412 x[i2 + 4] = xw[4]; 413 idiag += 25; 414 i2 += 5; 415 } 416 break; 417 case 6: 418 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 419 t[0] = b[0]; 420 t[1] = b[1]; 421 t[2] = b[2]; 422 t[3] = b[3]; 423 t[4] = b[4]; 424 t[5] = b[5]; 425 i2 = 6; 426 idiag += 36; 427 for (i = 1; i < m; i++) { 428 v = aa + 36 * ai[i]; 429 vi = aj + ai[i]; 430 nz = diag[i] - ai[i]; 431 s[0] = b[i2]; 432 s[1] = b[i2 + 1]; 433 s[2] = b[i2 + 2]; 434 s[3] = b[i2 + 3]; 435 s[4] = b[i2 + 4]; 436 s[5] = b[i2 + 5]; 437 while (nz--) { 438 idx = 6 * (*vi++); 439 xw[0] = x[idx]; 440 xw[1] = x[1 + idx]; 441 xw[2] = x[2 + idx]; 442 xw[3] = x[3 + idx]; 443 xw[4] = x[4 + idx]; 444 xw[5] = x[5 + idx]; 445 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446 v += 36; 447 } 448 t[i2] = s[0]; 449 t[i2 + 1] = s[1]; 450 t[i2 + 2] = s[2]; 451 t[i2 + 3] = s[3]; 452 t[i2 + 4] = s[4]; 453 t[i2 + 5] = s[5]; 454 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 455 x[i2] = xw[0]; 456 x[i2 + 1] = xw[1]; 457 x[i2 + 2] = xw[2]; 458 x[i2 + 3] = xw[3]; 459 x[i2 + 4] = xw[4]; 460 x[i2 + 5] = xw[5]; 461 idiag += 36; 462 i2 += 6; 463 } 464 break; 465 case 7: 466 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 467 t[0] = b[0]; 468 t[1] = b[1]; 469 t[2] = b[2]; 470 t[3] = b[3]; 471 t[4] = b[4]; 472 t[5] = b[5]; 473 t[6] = b[6]; 474 i2 = 7; 475 idiag += 49; 476 for (i = 1; i < m; i++) { 477 v = aa + 49 * ai[i]; 478 vi = aj + ai[i]; 479 nz = diag[i] - ai[i]; 480 s[0] = b[i2]; 481 s[1] = b[i2 + 1]; 482 s[2] = b[i2 + 2]; 483 s[3] = b[i2 + 3]; 484 s[4] = b[i2 + 4]; 485 s[5] = b[i2 + 5]; 486 s[6] = b[i2 + 6]; 487 while (nz--) { 488 idx = 7 * (*vi++); 489 xw[0] = x[idx]; 490 xw[1] = x[1 + idx]; 491 xw[2] = x[2 + idx]; 492 xw[3] = x[3 + idx]; 493 xw[4] = x[4 + idx]; 494 xw[5] = x[5 + idx]; 495 xw[6] = x[6 + idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497 v += 49; 498 } 499 t[i2] = s[0]; 500 t[i2 + 1] = s[1]; 501 t[i2 + 2] = s[2]; 502 t[i2 + 3] = s[3]; 503 t[i2 + 4] = s[4]; 504 t[i2 + 5] = s[5]; 505 t[i2 + 6] = s[6]; 506 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 507 x[i2] = xw[0]; 508 x[i2 + 1] = xw[1]; 509 x[i2 + 2] = xw[2]; 510 x[i2 + 3] = xw[3]; 511 x[i2 + 4] = xw[4]; 512 x[i2 + 5] = xw[5]; 513 x[i2 + 6] = xw[6]; 514 idiag += 49; 515 i2 += 7; 516 } 517 break; 518 default: 519 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 520 PetscCall(PetscArraycpy(t, b, bs)); 521 i2 = bs; 522 idiag += bs2; 523 for (i = 1; i < m; i++) { 524 v = aa + bs2 * ai[i]; 525 vi = aj + ai[i]; 526 nz = diag[i] - ai[i]; 527 528 PetscCall(PetscArraycpy(w, b + i2, bs)); 529 /* copy all rows of x that are needed into contiguous space */ 530 workt = work; 531 for (j = 0; j < nz; j++) { 532 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533 workt += bs; 534 } 535 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 536 PetscCall(PetscArraycpy(t + i2, w, bs)); 537 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538 539 idiag += bs2; 540 i2 += bs; 541 } 542 break; 543 } 544 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 545 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546 xb = t; 547 } else xb = b; 548 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549 idiag = a->idiag + bs2 * (a->mbs - 1); 550 i2 = bs * (m - 1); 551 switch (bs) { 552 case 1: 553 s[0] = xb[i2]; 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 i2 -= 1; 557 for (i = m - 2; i >= 0; i--) { 558 v = aa + (diag[i] + 1); 559 vi = aj + diag[i] + 1; 560 nz = ai[i + 1] - diag[i] - 1; 561 s[0] = xb[i2]; 562 for (j = 0; j < nz; j++) { 563 xw[0] = x[vi[j]]; 564 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565 } 566 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567 x[i2] = xw[0]; 568 idiag -= 1; 569 i2 -= 1; 570 } 571 break; 572 case 2: 573 s[0] = xb[i2]; 574 s[1] = xb[i2 + 1]; 575 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 576 x[i2] = xw[0]; 577 x[i2 + 1] = xw[1]; 578 i2 -= 2; 579 idiag -= 4; 580 for (i = m - 2; i >= 0; i--) { 581 v = aa + 4 * (diag[i] + 1); 582 vi = aj + diag[i] + 1; 583 nz = ai[i + 1] - diag[i] - 1; 584 s[0] = xb[i2]; 585 s[1] = xb[i2 + 1]; 586 for (j = 0; j < nz; j++) { 587 idx = 2 * vi[j]; 588 it = 4 * j; 589 xw[0] = x[idx]; 590 xw[1] = x[1 + idx]; 591 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592 } 593 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 594 x[i2] = xw[0]; 595 x[i2 + 1] = xw[1]; 596 idiag -= 4; 597 i2 -= 2; 598 } 599 break; 600 case 3: 601 s[0] = xb[i2]; 602 s[1] = xb[i2 + 1]; 603 s[2] = xb[i2 + 2]; 604 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 605 x[i2] = xw[0]; 606 x[i2 + 1] = xw[1]; 607 x[i2 + 2] = xw[2]; 608 i2 -= 3; 609 idiag -= 9; 610 for (i = m - 2; i >= 0; i--) { 611 v = aa + 9 * (diag[i] + 1); 612 vi = aj + diag[i] + 1; 613 nz = ai[i + 1] - diag[i] - 1; 614 s[0] = xb[i2]; 615 s[1] = xb[i2 + 1]; 616 s[2] = xb[i2 + 2]; 617 while (nz--) { 618 idx = 3 * (*vi++); 619 xw[0] = x[idx]; 620 xw[1] = x[1 + idx]; 621 xw[2] = x[2 + idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 626 x[i2] = xw[0]; 627 x[i2 + 1] = xw[1]; 628 x[i2 + 2] = xw[2]; 629 idiag -= 9; 630 i2 -= 3; 631 } 632 break; 633 case 4: 634 s[0] = xb[i2]; 635 s[1] = xb[i2 + 1]; 636 s[2] = xb[i2 + 2]; 637 s[3] = xb[i2 + 3]; 638 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 639 x[i2] = xw[0]; 640 x[i2 + 1] = xw[1]; 641 x[i2 + 2] = xw[2]; 642 x[i2 + 3] = xw[3]; 643 i2 -= 4; 644 idiag -= 16; 645 for (i = m - 2; i >= 0; i--) { 646 v = aa + 16 * (diag[i] + 1); 647 vi = aj + diag[i] + 1; 648 nz = ai[i + 1] - diag[i] - 1; 649 s[0] = xb[i2]; 650 s[1] = xb[i2 + 1]; 651 s[2] = xb[i2 + 2]; 652 s[3] = xb[i2 + 3]; 653 while (nz--) { 654 idx = 4 * (*vi++); 655 xw[0] = x[idx]; 656 xw[1] = x[1 + idx]; 657 xw[2] = x[2 + idx]; 658 xw[3] = x[3 + idx]; 659 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660 v += 16; 661 } 662 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 663 x[i2] = xw[0]; 664 x[i2 + 1] = xw[1]; 665 x[i2 + 2] = xw[2]; 666 x[i2 + 3] = xw[3]; 667 idiag -= 16; 668 i2 -= 4; 669 } 670 break; 671 case 5: 672 s[0] = xb[i2]; 673 s[1] = xb[i2 + 1]; 674 s[2] = xb[i2 + 2]; 675 s[3] = xb[i2 + 3]; 676 s[4] = xb[i2 + 4]; 677 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 678 x[i2] = xw[0]; 679 x[i2 + 1] = xw[1]; 680 x[i2 + 2] = xw[2]; 681 x[i2 + 3] = xw[3]; 682 x[i2 + 4] = xw[4]; 683 i2 -= 5; 684 idiag -= 25; 685 for (i = m - 2; i >= 0; i--) { 686 v = aa + 25 * (diag[i] + 1); 687 vi = aj + diag[i] + 1; 688 nz = ai[i + 1] - diag[i] - 1; 689 s[0] = xb[i2]; 690 s[1] = xb[i2 + 1]; 691 s[2] = xb[i2 + 2]; 692 s[3] = xb[i2 + 3]; 693 s[4] = xb[i2 + 4]; 694 while (nz--) { 695 idx = 5 * (*vi++); 696 xw[0] = x[idx]; 697 xw[1] = x[1 + idx]; 698 xw[2] = x[2 + idx]; 699 xw[3] = x[3 + idx]; 700 xw[4] = x[4 + idx]; 701 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702 v += 25; 703 } 704 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 705 x[i2] = xw[0]; 706 x[i2 + 1] = xw[1]; 707 x[i2 + 2] = xw[2]; 708 x[i2 + 3] = xw[3]; 709 x[i2 + 4] = xw[4]; 710 idiag -= 25; 711 i2 -= 5; 712 } 713 break; 714 case 6: 715 s[0] = xb[i2]; 716 s[1] = xb[i2 + 1]; 717 s[2] = xb[i2 + 2]; 718 s[3] = xb[i2 + 3]; 719 s[4] = xb[i2 + 4]; 720 s[5] = xb[i2 + 5]; 721 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 722 x[i2] = xw[0]; 723 x[i2 + 1] = xw[1]; 724 x[i2 + 2] = xw[2]; 725 x[i2 + 3] = xw[3]; 726 x[i2 + 4] = xw[4]; 727 x[i2 + 5] = xw[5]; 728 i2 -= 6; 729 idiag -= 36; 730 for (i = m - 2; i >= 0; i--) { 731 v = aa + 36 * (diag[i] + 1); 732 vi = aj + diag[i] + 1; 733 nz = ai[i + 1] - diag[i] - 1; 734 s[0] = xb[i2]; 735 s[1] = xb[i2 + 1]; 736 s[2] = xb[i2 + 2]; 737 s[3] = xb[i2 + 3]; 738 s[4] = xb[i2 + 4]; 739 s[5] = xb[i2 + 5]; 740 while (nz--) { 741 idx = 6 * (*vi++); 742 xw[0] = x[idx]; 743 xw[1] = x[1 + idx]; 744 xw[2] = x[2 + idx]; 745 xw[3] = x[3 + idx]; 746 xw[4] = x[4 + idx]; 747 xw[5] = x[5 + idx]; 748 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749 v += 36; 750 } 751 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 752 x[i2] = xw[0]; 753 x[i2 + 1] = xw[1]; 754 x[i2 + 2] = xw[2]; 755 x[i2 + 3] = xw[3]; 756 x[i2 + 4] = xw[4]; 757 x[i2 + 5] = xw[5]; 758 idiag -= 36; 759 i2 -= 6; 760 } 761 break; 762 case 7: 763 s[0] = xb[i2]; 764 s[1] = xb[i2 + 1]; 765 s[2] = xb[i2 + 2]; 766 s[3] = xb[i2 + 3]; 767 s[4] = xb[i2 + 4]; 768 s[5] = xb[i2 + 5]; 769 s[6] = xb[i2 + 6]; 770 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 771 x[i2] = xw[0]; 772 x[i2 + 1] = xw[1]; 773 x[i2 + 2] = xw[2]; 774 x[i2 + 3] = xw[3]; 775 x[i2 + 4] = xw[4]; 776 x[i2 + 5] = xw[5]; 777 x[i2 + 6] = xw[6]; 778 i2 -= 7; 779 idiag -= 49; 780 for (i = m - 2; i >= 0; i--) { 781 v = aa + 49 * (diag[i] + 1); 782 vi = aj + diag[i] + 1; 783 nz = ai[i + 1] - diag[i] - 1; 784 s[0] = xb[i2]; 785 s[1] = xb[i2 + 1]; 786 s[2] = xb[i2 + 2]; 787 s[3] = xb[i2 + 3]; 788 s[4] = xb[i2 + 4]; 789 s[5] = xb[i2 + 5]; 790 s[6] = xb[i2 + 6]; 791 while (nz--) { 792 idx = 7 * (*vi++); 793 xw[0] = x[idx]; 794 xw[1] = x[1 + idx]; 795 xw[2] = x[2 + idx]; 796 xw[3] = x[3 + idx]; 797 xw[4] = x[4 + idx]; 798 xw[5] = x[5 + idx]; 799 xw[6] = x[6 + idx]; 800 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801 v += 49; 802 } 803 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 804 x[i2] = xw[0]; 805 x[i2 + 1] = xw[1]; 806 x[i2 + 2] = xw[2]; 807 x[i2 + 3] = xw[3]; 808 x[i2 + 4] = xw[4]; 809 x[i2 + 5] = xw[5]; 810 x[i2 + 6] = xw[6]; 811 idiag -= 49; 812 i2 -= 7; 813 } 814 break; 815 default: 816 PetscCall(PetscArraycpy(w, xb + i2, bs)); 817 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818 i2 -= bs; 819 idiag -= bs2; 820 for (i = m - 2; i >= 0; i--) { 821 v = aa + bs2 * (diag[i] + 1); 822 vi = aj + diag[i] + 1; 823 nz = ai[i + 1] - diag[i] - 1; 824 825 PetscCall(PetscArraycpy(w, xb + i2, bs)); 826 /* copy all rows of x that are needed into contiguous space */ 827 workt = work; 828 for (j = 0; j < nz; j++) { 829 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830 workt += bs; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 833 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834 835 idiag -= bs2; 836 i2 -= bs; 837 } 838 break; 839 } 840 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841 } 842 its--; 843 } 844 while (its--) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846 idiag = a->idiag; 847 i2 = 0; 848 switch (bs) { 849 case 1: 850 for (i = 0; i < m; i++) { 851 v = aa + ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i + 1] - ai[i]; 854 s[0] = b[i2]; 855 for (j = 0; j < nz; j++) { 856 xw[0] = x[vi[j]]; 857 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858 } 859 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860 x[i2] += xw[0]; 861 idiag += 1; 862 i2 += 1; 863 } 864 break; 865 case 2: 866 for (i = 0; i < m; i++) { 867 v = aa + 4 * ai[i]; 868 vi = aj + ai[i]; 869 nz = ai[i + 1] - ai[i]; 870 s[0] = b[i2]; 871 s[1] = b[i2 + 1]; 872 for (j = 0; j < nz; j++) { 873 idx = 2 * vi[j]; 874 it = 4 * j; 875 xw[0] = x[idx]; 876 xw[1] = x[1 + idx]; 877 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878 } 879 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 880 x[i2] += xw[0]; 881 x[i2 + 1] += xw[1]; 882 idiag += 4; 883 i2 += 2; 884 } 885 break; 886 case 3: 887 for (i = 0; i < m; i++) { 888 v = aa + 9 * ai[i]; 889 vi = aj + ai[i]; 890 nz = ai[i + 1] - ai[i]; 891 s[0] = b[i2]; 892 s[1] = b[i2 + 1]; 893 s[2] = b[i2 + 2]; 894 while (nz--) { 895 idx = 3 * (*vi++); 896 xw[0] = x[idx]; 897 xw[1] = x[1 + idx]; 898 xw[2] = x[2 + idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900 v += 9; 901 } 902 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 903 x[i2] += xw[0]; 904 x[i2 + 1] += xw[1]; 905 x[i2 + 2] += xw[2]; 906 idiag += 9; 907 i2 += 3; 908 } 909 break; 910 case 4: 911 for (i = 0; i < m; i++) { 912 v = aa + 16 * ai[i]; 913 vi = aj + ai[i]; 914 nz = ai[i + 1] - ai[i]; 915 s[0] = b[i2]; 916 s[1] = b[i2 + 1]; 917 s[2] = b[i2 + 2]; 918 s[3] = b[i2 + 3]; 919 while (nz--) { 920 idx = 4 * (*vi++); 921 xw[0] = x[idx]; 922 xw[1] = x[1 + idx]; 923 xw[2] = x[2 + idx]; 924 xw[3] = x[3 + idx]; 925 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926 v += 16; 927 } 928 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 929 x[i2] += xw[0]; 930 x[i2 + 1] += xw[1]; 931 x[i2 + 2] += xw[2]; 932 x[i2 + 3] += xw[3]; 933 idiag += 16; 934 i2 += 4; 935 } 936 break; 937 case 5: 938 for (i = 0; i < m; i++) { 939 v = aa + 25 * ai[i]; 940 vi = aj + ai[i]; 941 nz = ai[i + 1] - ai[i]; 942 s[0] = b[i2]; 943 s[1] = b[i2 + 1]; 944 s[2] = b[i2 + 2]; 945 s[3] = b[i2 + 3]; 946 s[4] = b[i2 + 4]; 947 while (nz--) { 948 idx = 5 * (*vi++); 949 xw[0] = x[idx]; 950 xw[1] = x[1 + idx]; 951 xw[2] = x[2 + idx]; 952 xw[3] = x[3 + idx]; 953 xw[4] = x[4 + idx]; 954 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955 v += 25; 956 } 957 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 958 x[i2] += xw[0]; 959 x[i2 + 1] += xw[1]; 960 x[i2 + 2] += xw[2]; 961 x[i2 + 3] += xw[3]; 962 x[i2 + 4] += xw[4]; 963 idiag += 25; 964 i2 += 5; 965 } 966 break; 967 case 6: 968 for (i = 0; i < m; i++) { 969 v = aa + 36 * ai[i]; 970 vi = aj + ai[i]; 971 nz = ai[i + 1] - ai[i]; 972 s[0] = b[i2]; 973 s[1] = b[i2 + 1]; 974 s[2] = b[i2 + 2]; 975 s[3] = b[i2 + 3]; 976 s[4] = b[i2 + 4]; 977 s[5] = b[i2 + 5]; 978 while (nz--) { 979 idx = 6 * (*vi++); 980 xw[0] = x[idx]; 981 xw[1] = x[1 + idx]; 982 xw[2] = x[2 + idx]; 983 xw[3] = x[3 + idx]; 984 xw[4] = x[4 + idx]; 985 xw[5] = x[5 + idx]; 986 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987 v += 36; 988 } 989 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 990 x[i2] += xw[0]; 991 x[i2 + 1] += xw[1]; 992 x[i2 + 2] += xw[2]; 993 x[i2 + 3] += xw[3]; 994 x[i2 + 4] += xw[4]; 995 x[i2 + 5] += xw[5]; 996 idiag += 36; 997 i2 += 6; 998 } 999 break; 1000 case 7: 1001 for (i = 0; i < m; i++) { 1002 v = aa + 49 * ai[i]; 1003 vi = aj + ai[i]; 1004 nz = ai[i + 1] - ai[i]; 1005 s[0] = b[i2]; 1006 s[1] = b[i2 + 1]; 1007 s[2] = b[i2 + 2]; 1008 s[3] = b[i2 + 3]; 1009 s[4] = b[i2 + 4]; 1010 s[5] = b[i2 + 5]; 1011 s[6] = b[i2 + 6]; 1012 while (nz--) { 1013 idx = 7 * (*vi++); 1014 xw[0] = x[idx]; 1015 xw[1] = x[1 + idx]; 1016 xw[2] = x[2 + idx]; 1017 xw[3] = x[3 + idx]; 1018 xw[4] = x[4 + idx]; 1019 xw[5] = x[5 + idx]; 1020 xw[6] = x[6 + idx]; 1021 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022 v += 49; 1023 } 1024 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1025 x[i2] += xw[0]; 1026 x[i2 + 1] += xw[1]; 1027 x[i2 + 2] += xw[2]; 1028 x[i2 + 3] += xw[3]; 1029 x[i2 + 4] += xw[4]; 1030 x[i2 + 5] += xw[5]; 1031 x[i2 + 6] += xw[6]; 1032 idiag += 49; 1033 i2 += 7; 1034 } 1035 break; 1036 default: 1037 for (i = 0; i < m; i++) { 1038 v = aa + bs2 * ai[i]; 1039 vi = aj + ai[i]; 1040 nz = ai[i + 1] - ai[i]; 1041 1042 PetscCall(PetscArraycpy(w, b + i2, bs)); 1043 /* copy all rows of x that are needed into contiguous space */ 1044 workt = work; 1045 for (j = 0; j < nz; j++) { 1046 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047 workt += bs; 1048 } 1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051 1052 idiag += bs2; 1053 i2 += bs; 1054 } 1055 break; 1056 } 1057 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058 } 1059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060 idiag = a->idiag + bs2 * (a->mbs - 1); 1061 i2 = bs * (m - 1); 1062 switch (bs) { 1063 case 1: 1064 for (i = m - 1; i >= 0; i--) { 1065 v = aa + ai[i]; 1066 vi = aj + ai[i]; 1067 nz = ai[i + 1] - ai[i]; 1068 s[0] = b[i2]; 1069 for (j = 0; j < nz; j++) { 1070 xw[0] = x[vi[j]]; 1071 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072 } 1073 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074 x[i2] += xw[0]; 1075 idiag -= 1; 1076 i2 -= 1; 1077 } 1078 break; 1079 case 2: 1080 for (i = m - 1; i >= 0; i--) { 1081 v = aa + 4 * ai[i]; 1082 vi = aj + ai[i]; 1083 nz = ai[i + 1] - ai[i]; 1084 s[0] = b[i2]; 1085 s[1] = b[i2 + 1]; 1086 for (j = 0; j < nz; j++) { 1087 idx = 2 * vi[j]; 1088 it = 4 * j; 1089 xw[0] = x[idx]; 1090 xw[1] = x[1 + idx]; 1091 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092 } 1093 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1094 x[i2] += xw[0]; 1095 x[i2 + 1] += xw[1]; 1096 idiag -= 4; 1097 i2 -= 2; 1098 } 1099 break; 1100 case 3: 1101 for (i = m - 1; i >= 0; i--) { 1102 v = aa + 9 * ai[i]; 1103 vi = aj + ai[i]; 1104 nz = ai[i + 1] - ai[i]; 1105 s[0] = b[i2]; 1106 s[1] = b[i2 + 1]; 1107 s[2] = b[i2 + 2]; 1108 while (nz--) { 1109 idx = 3 * (*vi++); 1110 xw[0] = x[idx]; 1111 xw[1] = x[1 + idx]; 1112 xw[2] = x[2 + idx]; 1113 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114 v += 9; 1115 } 1116 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1117 x[i2] += xw[0]; 1118 x[i2 + 1] += xw[1]; 1119 x[i2 + 2] += xw[2]; 1120 idiag -= 9; 1121 i2 -= 3; 1122 } 1123 break; 1124 case 4: 1125 for (i = m - 1; i >= 0; i--) { 1126 v = aa + 16 * ai[i]; 1127 vi = aj + ai[i]; 1128 nz = ai[i + 1] - ai[i]; 1129 s[0] = b[i2]; 1130 s[1] = b[i2 + 1]; 1131 s[2] = b[i2 + 2]; 1132 s[3] = b[i2 + 3]; 1133 while (nz--) { 1134 idx = 4 * (*vi++); 1135 xw[0] = x[idx]; 1136 xw[1] = x[1 + idx]; 1137 xw[2] = x[2 + idx]; 1138 xw[3] = x[3 + idx]; 1139 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140 v += 16; 1141 } 1142 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1143 x[i2] += xw[0]; 1144 x[i2 + 1] += xw[1]; 1145 x[i2 + 2] += xw[2]; 1146 x[i2 + 3] += xw[3]; 1147 idiag -= 16; 1148 i2 -= 4; 1149 } 1150 break; 1151 case 5: 1152 for (i = m - 1; i >= 0; i--) { 1153 v = aa + 25 * ai[i]; 1154 vi = aj + ai[i]; 1155 nz = ai[i + 1] - ai[i]; 1156 s[0] = b[i2]; 1157 s[1] = b[i2 + 1]; 1158 s[2] = b[i2 + 2]; 1159 s[3] = b[i2 + 3]; 1160 s[4] = b[i2 + 4]; 1161 while (nz--) { 1162 idx = 5 * (*vi++); 1163 xw[0] = x[idx]; 1164 xw[1] = x[1 + idx]; 1165 xw[2] = x[2 + idx]; 1166 xw[3] = x[3 + idx]; 1167 xw[4] = x[4 + idx]; 1168 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169 v += 25; 1170 } 1171 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1172 x[i2] += xw[0]; 1173 x[i2 + 1] += xw[1]; 1174 x[i2 + 2] += xw[2]; 1175 x[i2 + 3] += xw[3]; 1176 x[i2 + 4] += xw[4]; 1177 idiag -= 25; 1178 i2 -= 5; 1179 } 1180 break; 1181 case 6: 1182 for (i = m - 1; i >= 0; i--) { 1183 v = aa + 36 * ai[i]; 1184 vi = aj + ai[i]; 1185 nz = ai[i + 1] - ai[i]; 1186 s[0] = b[i2]; 1187 s[1] = b[i2 + 1]; 1188 s[2] = b[i2 + 2]; 1189 s[3] = b[i2 + 3]; 1190 s[4] = b[i2 + 4]; 1191 s[5] = b[i2 + 5]; 1192 while (nz--) { 1193 idx = 6 * (*vi++); 1194 xw[0] = x[idx]; 1195 xw[1] = x[1 + idx]; 1196 xw[2] = x[2 + idx]; 1197 xw[3] = x[3 + idx]; 1198 xw[4] = x[4 + idx]; 1199 xw[5] = x[5 + idx]; 1200 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201 v += 36; 1202 } 1203 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1204 x[i2] += xw[0]; 1205 x[i2 + 1] += xw[1]; 1206 x[i2 + 2] += xw[2]; 1207 x[i2 + 3] += xw[3]; 1208 x[i2 + 4] += xw[4]; 1209 x[i2 + 5] += xw[5]; 1210 idiag -= 36; 1211 i2 -= 6; 1212 } 1213 break; 1214 case 7: 1215 for (i = m - 1; i >= 0; i--) { 1216 v = aa + 49 * ai[i]; 1217 vi = aj + ai[i]; 1218 nz = ai[i + 1] - ai[i]; 1219 s[0] = b[i2]; 1220 s[1] = b[i2 + 1]; 1221 s[2] = b[i2 + 2]; 1222 s[3] = b[i2 + 3]; 1223 s[4] = b[i2 + 4]; 1224 s[5] = b[i2 + 5]; 1225 s[6] = b[i2 + 6]; 1226 while (nz--) { 1227 idx = 7 * (*vi++); 1228 xw[0] = x[idx]; 1229 xw[1] = x[1 + idx]; 1230 xw[2] = x[2 + idx]; 1231 xw[3] = x[3 + idx]; 1232 xw[4] = x[4 + idx]; 1233 xw[5] = x[5 + idx]; 1234 xw[6] = x[6 + idx]; 1235 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236 v += 49; 1237 } 1238 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1239 x[i2] += xw[0]; 1240 x[i2 + 1] += xw[1]; 1241 x[i2 + 2] += xw[2]; 1242 x[i2 + 3] += xw[3]; 1243 x[i2 + 4] += xw[4]; 1244 x[i2 + 5] += xw[5]; 1245 x[i2 + 6] += xw[6]; 1246 idiag -= 49; 1247 i2 -= 7; 1248 } 1249 break; 1250 default: 1251 for (i = m - 1; i >= 0; i--) { 1252 v = aa + bs2 * ai[i]; 1253 vi = aj + ai[i]; 1254 nz = ai[i + 1] - ai[i]; 1255 1256 PetscCall(PetscArraycpy(w, b + i2, bs)); 1257 /* copy all rows of x that are needed into contiguous space */ 1258 workt = work; 1259 for (j = 0; j < nz; j++) { 1260 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261 workt += bs; 1262 } 1263 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265 1266 idiag -= bs2; 1267 i2 -= bs; 1268 } 1269 break; 1270 } 1271 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272 } 1273 } 1274 PetscCall(VecRestoreArray(xx, &x)); 1275 PetscCall(VecRestoreArrayRead(bb, &b)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 /* 1280 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281 */ 1282 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285 #define matsetvaluesblocked4_ matsetvaluesblocked4 1286 #endif 1287 1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289 { 1290 Mat A = *AA; 1291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293 PetscInt *ai = a->i, *ailen = a->ilen; 1294 PetscInt *aj = a->j, stepval, lastcol = -1; 1295 const PetscScalar *value = v; 1296 MatScalar *ap, *aa = a->a, *bap; 1297 1298 PetscFunctionBegin; 1299 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300 stepval = (n - 1) * 4; 1301 for (k = 0; k < m; k++) { /* loop over added rows */ 1302 row = im[k]; 1303 rp = aj + ai[row]; 1304 ap = aa + 16 * ai[row]; 1305 nrow = ailen[row]; 1306 low = 0; 1307 high = nrow; 1308 for (l = 0; l < n; l++) { /* loop over added columns */ 1309 col = in[l]; 1310 if (col <= lastcol) low = 0; 1311 else high = nrow; 1312 lastcol = col; 1313 value = v + k * (stepval + 4 + l) * 4; 1314 while (high - low > 7) { 1315 t = (low + high) / 2; 1316 if (rp[t] > col) high = t; 1317 else low = t; 1318 } 1319 for (i = low; i < high; i++) { 1320 if (rp[i] > col) break; 1321 if (rp[i] == col) { 1322 bap = ap + 16 * i; 1323 for (ii = 0; ii < 4; ii++, value += stepval) { 1324 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325 } 1326 goto noinsert2; 1327 } 1328 } 1329 N = nrow++ - 1; 1330 high++; /* added new column index thus must search to one higher than before */ 1331 /* shift up all the later entries in this row */ 1332 for (ii = N; ii >= i; ii--) { 1333 rp[ii + 1] = rp[ii]; 1334 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335 } 1336 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337 rp[i] = col; 1338 bap = ap + 16 * i; 1339 for (ii = 0; ii < 4; ii++, value += stepval) { 1340 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341 } 1342 noinsert2:; 1343 low = i; 1344 } 1345 ailen[row] = nrow; 1346 } 1347 PetscFunctionReturnVoid(); 1348 } 1349 1350 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351 #define matsetvalues4_ MATSETVALUES4 1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353 #define matsetvalues4_ matsetvalues4 1354 #endif 1355 1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357 { 1358 Mat A = *AA; 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361 PetscInt *ai = a->i, *ailen = a->ilen; 1362 PetscInt *aj = a->j, brow, bcol; 1363 PetscInt ridx, cidx, lastcol = -1; 1364 MatScalar *ap, value, *aa = a->a, *bap; 1365 1366 PetscFunctionBegin; 1367 for (k = 0; k < m; k++) { /* loop over added rows */ 1368 row = im[k]; 1369 brow = row / 4; 1370 rp = aj + ai[brow]; 1371 ap = aa + 16 * ai[brow]; 1372 nrow = ailen[brow]; 1373 low = 0; 1374 high = nrow; 1375 for (l = 0; l < n; l++) { /* loop over added columns */ 1376 col = in[l]; 1377 bcol = col / 4; 1378 ridx = row % 4; 1379 cidx = col % 4; 1380 value = v[l + k * n]; 1381 if (col <= lastcol) low = 0; 1382 else high = nrow; 1383 lastcol = col; 1384 while (high - low > 7) { 1385 t = (low + high) / 2; 1386 if (rp[t] > bcol) high = t; 1387 else low = t; 1388 } 1389 for (i = low; i < high; i++) { 1390 if (rp[i] > bcol) break; 1391 if (rp[i] == bcol) { 1392 bap = ap + 16 * i + 4 * cidx + ridx; 1393 *bap += value; 1394 goto noinsert1; 1395 } 1396 } 1397 N = nrow++ - 1; 1398 high++; /* added new column thus must search to one higher than before */ 1399 /* shift up all the later entries in this row */ 1400 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1401 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1402 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403 rp[i] = bcol; 1404 ap[16 * i + 4 * cidx + ridx] = value; 1405 noinsert1:; 1406 low = i; 1407 } 1408 ailen[brow] = nrow; 1409 } 1410 PetscFunctionReturnVoid(); 1411 } 1412 1413 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1414 { 1415 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1416 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1417 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1418 1419 PetscFunctionBegin; 1420 *nn = n; 1421 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1422 if (symmetric) { 1423 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1424 nz = tia[n]; 1425 } else { 1426 tia = a->i; 1427 tja = a->j; 1428 } 1429 1430 if (!blockcompressed && bs > 1) { 1431 (*nn) *= bs; 1432 /* malloc & create the natural set of indices */ 1433 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1434 if (n) { 1435 (*ia)[0] = oshift; 1436 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1437 } 1438 1439 for (i = 1; i < n; i++) { 1440 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1441 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1442 } 1443 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1444 1445 if (inja) { 1446 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1447 cnt = 0; 1448 for (i = 0; i < n; i++) { 1449 for (j = 0; j < bs; j++) { 1450 for (k = tia[i]; k < tia[i + 1]; k++) { 1451 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1452 } 1453 } 1454 } 1455 } 1456 1457 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1458 PetscCall(PetscFree(tia)); 1459 PetscCall(PetscFree(tja)); 1460 } 1461 } else if (oshift == 1) { 1462 if (symmetric) { 1463 nz = tia[A->rmap->n / bs]; 1464 /* add 1 to i and j indices */ 1465 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1466 *ia = tia; 1467 if (ja) { 1468 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1469 *ja = tja; 1470 } 1471 } else { 1472 nz = a->i[A->rmap->n / bs]; 1473 /* malloc space and add 1 to i and j indices */ 1474 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1475 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1476 if (ja) { 1477 PetscCall(PetscMalloc1(nz, ja)); 1478 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1479 } 1480 } 1481 } else { 1482 *ia = tia; 1483 if (ja) *ja = tja; 1484 } 1485 PetscFunctionReturn(PETSC_SUCCESS); 1486 } 1487 1488 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1489 { 1490 PetscFunctionBegin; 1491 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1492 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1493 PetscCall(PetscFree(*ia)); 1494 if (ja) PetscCall(PetscFree(*ja)); 1495 } 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1500 { 1501 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1502 1503 PetscFunctionBegin; 1504 if (A->hash_active) { 1505 PetscInt bs; 1506 A->ops[0] = a->cops; 1507 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1508 PetscCall(MatGetBlockSize(A, &bs)); 1509 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1510 PetscCall(PetscFree(a->dnz)); 1511 PetscCall(PetscFree(a->bdnz)); 1512 A->hash_active = PETSC_FALSE; 1513 } 1514 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1515 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1516 PetscCall(ISDestroy(&a->row)); 1517 PetscCall(ISDestroy(&a->col)); 1518 PetscCall(PetscFree(a->diag)); 1519 PetscCall(PetscFree(a->idiag)); 1520 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1521 PetscCall(PetscFree(a->solve_work)); 1522 PetscCall(PetscFree(a->mult_work)); 1523 PetscCall(PetscFree(a->sor_workt)); 1524 PetscCall(PetscFree(a->sor_work)); 1525 PetscCall(ISDestroy(&a->icol)); 1526 PetscCall(PetscFree(a->saved_values)); 1527 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1528 1529 PetscCall(MatDestroy(&a->sbaijMat)); 1530 PetscCall(MatDestroy(&a->parent)); 1531 PetscCall(PetscFree(A->data)); 1532 1533 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1534 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1535 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1536 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1537 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1538 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1539 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1540 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1541 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1542 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1543 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1544 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1545 #if defined(PETSC_HAVE_HYPRE) 1546 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1547 #endif 1548 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1549 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1550 PetscFunctionReturn(PETSC_SUCCESS); 1551 } 1552 1553 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1554 { 1555 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1556 1557 PetscFunctionBegin; 1558 switch (op) { 1559 case MAT_ROW_ORIENTED: 1560 a->roworiented = flg; 1561 break; 1562 case MAT_KEEP_NONZERO_PATTERN: 1563 a->keepnonzeropattern = flg; 1564 break; 1565 case MAT_NEW_NONZERO_LOCATIONS: 1566 a->nonew = (flg ? 0 : 1); 1567 break; 1568 case MAT_NEW_NONZERO_LOCATION_ERR: 1569 a->nonew = (flg ? -1 : 0); 1570 break; 1571 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1572 a->nonew = (flg ? -2 : 0); 1573 break; 1574 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1575 a->nounused = (flg ? -1 : 0); 1576 break; 1577 default: 1578 break; 1579 } 1580 PetscFunctionReturn(PETSC_SUCCESS); 1581 } 1582 1583 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1584 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1585 { 1586 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1587 MatScalar *aa_i; 1588 PetscScalar *v_i; 1589 1590 PetscFunctionBegin; 1591 bs = A->rmap->bs; 1592 bs2 = bs * bs; 1593 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1594 1595 bn = row / bs; /* Block number */ 1596 bp = row % bs; /* Block Position */ 1597 M = ai[bn + 1] - ai[bn]; 1598 *nz = bs * M; 1599 1600 if (v) { 1601 *v = NULL; 1602 if (*nz) { 1603 PetscCall(PetscMalloc1(*nz, v)); 1604 for (i = 0; i < M; i++) { /* for each block in the block row */ 1605 v_i = *v + i * bs; 1606 aa_i = aa + bs2 * (ai[bn] + i); 1607 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1608 } 1609 } 1610 } 1611 1612 if (idx) { 1613 *idx = NULL; 1614 if (*nz) { 1615 PetscCall(PetscMalloc1(*nz, idx)); 1616 for (i = 0; i < M; i++) { /* for each block in the block row */ 1617 idx_i = *idx + i * bs; 1618 itmp = bs * aj[ai[bn] + i]; 1619 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1620 } 1621 } 1622 } 1623 PetscFunctionReturn(PETSC_SUCCESS); 1624 } 1625 1626 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1627 { 1628 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1629 1630 PetscFunctionBegin; 1631 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1632 PetscFunctionReturn(PETSC_SUCCESS); 1633 } 1634 1635 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1636 { 1637 PetscFunctionBegin; 1638 if (idx) PetscCall(PetscFree(*idx)); 1639 if (v) PetscCall(PetscFree(*v)); 1640 PetscFunctionReturn(PETSC_SUCCESS); 1641 } 1642 1643 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1644 { 1645 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1646 Mat C; 1647 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1648 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1649 MatScalar *ata, *aa = a->a; 1650 1651 PetscFunctionBegin; 1652 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1653 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1654 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1655 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1656 1657 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1658 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1659 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1660 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1661 1662 at = (Mat_SeqBAIJ *)C->data; 1663 ati = at->i; 1664 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1665 } else { 1666 C = *B; 1667 at = (Mat_SeqBAIJ *)C->data; 1668 ati = at->i; 1669 } 1670 1671 atj = at->j; 1672 ata = at->a; 1673 1674 /* Copy ati into atfill so we have locations of the next free space in atj */ 1675 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1676 1677 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1678 for (i = 0; i < mbs; i++) { 1679 anzj = ai[i + 1] - ai[i]; 1680 for (j = 0; j < anzj; j++) { 1681 atj[atfill[*aj]] = i; 1682 for (kr = 0; kr < bs; kr++) { 1683 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1684 } 1685 atfill[*aj++] += 1; 1686 } 1687 } 1688 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1689 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1690 1691 /* Clean up temporary space and complete requests. */ 1692 PetscCall(PetscFree(atfill)); 1693 1694 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1695 PetscCall(MatSetBlockSizes(C, A->cmap->bs, A->rmap->bs)); 1696 *B = C; 1697 } else { 1698 PetscCall(MatHeaderMerge(A, &C)); 1699 } 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1704 { 1705 Mat Btrans; 1706 1707 PetscFunctionBegin; 1708 *f = PETSC_FALSE; 1709 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1710 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1711 PetscCall(MatDestroy(&Btrans)); 1712 PetscFunctionReturn(PETSC_SUCCESS); 1713 } 1714 1715 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1716 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1717 { 1718 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1719 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1720 PetscInt *rowlens, *colidxs; 1721 PetscScalar *matvals; 1722 1723 PetscFunctionBegin; 1724 PetscCall(PetscViewerSetUp(viewer)); 1725 1726 M = mat->rmap->N; 1727 N = mat->cmap->N; 1728 m = mat->rmap->n; 1729 bs = mat->rmap->bs; 1730 nz = bs * bs * A->nz; 1731 1732 /* write matrix header */ 1733 header[0] = MAT_FILE_CLASSID; 1734 header[1] = M; 1735 header[2] = N; 1736 header[3] = nz; 1737 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1738 1739 /* store row lengths */ 1740 PetscCall(PetscMalloc1(m, &rowlens)); 1741 for (cnt = 0, i = 0; i < A->mbs; i++) 1742 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1743 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1744 PetscCall(PetscFree(rowlens)); 1745 1746 /* store column indices */ 1747 PetscCall(PetscMalloc1(nz, &colidxs)); 1748 for (cnt = 0, i = 0; i < A->mbs; i++) 1749 for (k = 0; k < bs; k++) 1750 for (j = A->i[i]; j < A->i[i + 1]; j++) 1751 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1752 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1753 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1754 PetscCall(PetscFree(colidxs)); 1755 1756 /* store nonzero values */ 1757 PetscCall(PetscMalloc1(nz, &matvals)); 1758 for (cnt = 0, i = 0; i < A->mbs; i++) 1759 for (k = 0; k < bs; k++) 1760 for (j = A->i[i]; j < A->i[i + 1]; j++) 1761 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1762 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1763 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1764 PetscCall(PetscFree(matvals)); 1765 1766 /* write block size option to the viewer's .info file */ 1767 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1768 PetscFunctionReturn(PETSC_SUCCESS); 1769 } 1770 1771 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1772 { 1773 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1774 PetscInt i, bs = A->rmap->bs, k; 1775 1776 PetscFunctionBegin; 1777 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1778 for (i = 0; i < a->mbs; i++) { 1779 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1780 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1781 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1782 } 1783 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1784 PetscFunctionReturn(PETSC_SUCCESS); 1785 } 1786 1787 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1788 { 1789 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1790 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1791 PetscViewerFormat format; 1792 1793 PetscFunctionBegin; 1794 if (A->structure_only) { 1795 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1796 PetscFunctionReturn(PETSC_SUCCESS); 1797 } 1798 1799 PetscCall(PetscViewerGetFormat(viewer, &format)); 1800 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1801 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1802 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1803 const char *matname; 1804 Mat aij; 1805 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1806 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1807 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1808 PetscCall(MatView(aij, viewer)); 1809 PetscCall(MatDestroy(&aij)); 1810 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1811 PetscFunctionReturn(PETSC_SUCCESS); 1812 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1813 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1814 for (i = 0; i < a->mbs; i++) { 1815 for (j = 0; j < bs; j++) { 1816 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1817 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1818 for (l = 0; l < bs; l++) { 1819 #if defined(PETSC_USE_COMPLEX) 1820 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1821 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1822 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1823 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1824 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1825 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1826 } 1827 #else 1828 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1829 #endif 1830 } 1831 } 1832 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1833 } 1834 } 1835 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1836 } else { 1837 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1838 for (i = 0; i < a->mbs; i++) { 1839 for (j = 0; j < bs; j++) { 1840 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1841 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1842 for (l = 0; l < bs; l++) { 1843 #if defined(PETSC_USE_COMPLEX) 1844 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1845 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1846 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1847 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1848 } else { 1849 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1850 } 1851 #else 1852 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1853 #endif 1854 } 1855 } 1856 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1857 } 1858 } 1859 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1860 } 1861 PetscCall(PetscViewerFlush(viewer)); 1862 PetscFunctionReturn(PETSC_SUCCESS); 1863 } 1864 1865 #include <petscdraw.h> 1866 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1867 { 1868 Mat A = (Mat)Aa; 1869 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1870 PetscInt row, i, j, k, l, mbs = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 1871 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1872 MatScalar *aa; 1873 PetscViewer viewer; 1874 PetscViewerFormat format; 1875 int color; 1876 1877 PetscFunctionBegin; 1878 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1879 PetscCall(PetscViewerGetFormat(viewer, &format)); 1880 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1881 1882 /* loop over matrix elements drawing boxes */ 1883 1884 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1885 PetscDrawCollectiveBegin(draw); 1886 /* Blue for negative, Cyan for zero and Red for positive */ 1887 color = PETSC_DRAW_BLUE; 1888 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1889 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1890 y_l = A->rmap->N - row - 1.0; 1891 y_r = y_l + 1.0; 1892 x_l = a->j[j] * bs; 1893 x_r = x_l + 1.0; 1894 aa = a->a + j * bs2; 1895 for (k = 0; k < bs; k++) { 1896 for (l = 0; l < bs; l++) { 1897 if (PetscRealPart(*aa++) >= 0.) continue; 1898 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1899 } 1900 } 1901 } 1902 } 1903 color = PETSC_DRAW_CYAN; 1904 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1905 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1906 y_l = A->rmap->N - row - 1.0; 1907 y_r = y_l + 1.0; 1908 x_l = a->j[j] * bs; 1909 x_r = x_l + 1.0; 1910 aa = a->a + j * bs2; 1911 for (k = 0; k < bs; k++) { 1912 for (l = 0; l < bs; l++) { 1913 if (PetscRealPart(*aa++) != 0.) continue; 1914 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1915 } 1916 } 1917 } 1918 } 1919 color = PETSC_DRAW_RED; 1920 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1921 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1922 y_l = A->rmap->N - row - 1.0; 1923 y_r = y_l + 1.0; 1924 x_l = a->j[j] * bs; 1925 x_r = x_l + 1.0; 1926 aa = a->a + j * bs2; 1927 for (k = 0; k < bs; k++) { 1928 for (l = 0; l < bs; l++) { 1929 if (PetscRealPart(*aa++) <= 0.) continue; 1930 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1931 } 1932 } 1933 } 1934 } 1935 PetscDrawCollectiveEnd(draw); 1936 } else { 1937 /* use contour shading to indicate magnitude of values */ 1938 /* first determine max of all nonzero values */ 1939 PetscReal minv = 0.0, maxv = 0.0; 1940 PetscDraw popup; 1941 1942 for (i = 0; i < a->nz * a->bs2; i++) { 1943 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1944 } 1945 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1946 PetscCall(PetscDrawGetPopup(draw, &popup)); 1947 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1948 1949 PetscDrawCollectiveBegin(draw); 1950 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1951 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1952 y_l = A->rmap->N - row - 1.0; 1953 y_r = y_l + 1.0; 1954 x_l = a->j[j] * bs; 1955 x_r = x_l + 1.0; 1956 aa = a->a + j * bs2; 1957 for (k = 0; k < bs; k++) { 1958 for (l = 0; l < bs; l++) { 1959 MatScalar v = *aa++; 1960 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 1961 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1962 } 1963 } 1964 } 1965 } 1966 PetscDrawCollectiveEnd(draw); 1967 } 1968 PetscFunctionReturn(PETSC_SUCCESS); 1969 } 1970 1971 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 1972 { 1973 PetscReal xl, yl, xr, yr, w, h; 1974 PetscDraw draw; 1975 PetscBool isnull; 1976 1977 PetscFunctionBegin; 1978 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1979 PetscCall(PetscDrawIsNull(draw, &isnull)); 1980 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1981 1982 xr = A->cmap->n; 1983 yr = A->rmap->N; 1984 h = yr / 10.0; 1985 w = xr / 10.0; 1986 xr += w; 1987 yr += h; 1988 xl = -w; 1989 yl = -h; 1990 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 1991 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 1992 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 1993 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 1994 PetscCall(PetscDrawSave(draw)); 1995 PetscFunctionReturn(PETSC_SUCCESS); 1996 } 1997 1998 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 1999 { 2000 PetscBool isascii, isbinary, isdraw; 2001 2002 PetscFunctionBegin; 2003 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 2004 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2005 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2006 if (isascii) { 2007 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2008 } else if (isbinary) { 2009 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2010 } else if (isdraw) { 2011 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2012 } else { 2013 Mat B; 2014 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2015 PetscCall(MatView(B, viewer)); 2016 PetscCall(MatDestroy(&B)); 2017 } 2018 PetscFunctionReturn(PETSC_SUCCESS); 2019 } 2020 2021 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2022 { 2023 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2024 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2025 PetscInt *ai = a->i, *ailen = a->ilen; 2026 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2027 MatScalar *ap, *aa = a->a; 2028 2029 PetscFunctionBegin; 2030 for (k = 0; k < m; k++) { /* loop over rows */ 2031 row = im[k]; 2032 brow = row / bs; 2033 if (row < 0) { 2034 v += n; 2035 continue; 2036 } /* negative row */ 2037 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2038 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2039 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2040 nrow = ailen[brow]; 2041 for (l = 0; l < n; l++) { /* loop over columns */ 2042 if (in[l] < 0) { 2043 v++; 2044 continue; 2045 } /* negative column */ 2046 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2047 col = in[l]; 2048 bcol = col / bs; 2049 cidx = col % bs; 2050 ridx = row % bs; 2051 high = nrow; 2052 low = 0; /* assume unsorted */ 2053 while (high - low > 5) { 2054 t = (low + high) / 2; 2055 if (rp[t] > bcol) high = t; 2056 else low = t; 2057 } 2058 for (i = low; i < high; i++) { 2059 if (rp[i] > bcol) break; 2060 if (rp[i] == bcol) { 2061 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2062 goto finished; 2063 } 2064 } 2065 *v++ = 0.0; 2066 finished:; 2067 } 2068 } 2069 PetscFunctionReturn(PETSC_SUCCESS); 2070 } 2071 2072 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2073 { 2074 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2075 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2076 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2077 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2078 PetscBool roworiented = a->roworiented; 2079 const PetscScalar *value = v; 2080 MatScalar *ap = NULL, *aa = a->a, *bap; 2081 2082 PetscFunctionBegin; 2083 if (roworiented) { 2084 stepval = (n - 1) * bs; 2085 } else { 2086 stepval = (m - 1) * bs; 2087 } 2088 for (k = 0; k < m; k++) { /* loop over added rows */ 2089 row = im[k]; 2090 if (row < 0) continue; 2091 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2092 rp = aj + ai[row]; 2093 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2094 rmax = imax[row]; 2095 nrow = ailen[row]; 2096 low = 0; 2097 high = nrow; 2098 for (l = 0; l < n; l++) { /* loop over added columns */ 2099 if (in[l] < 0) continue; 2100 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2101 col = in[l]; 2102 if (!A->structure_only) { 2103 if (roworiented) { 2104 value = v + (k * (stepval + bs) + l) * bs; 2105 } else { 2106 value = v + (l * (stepval + bs) + k) * bs; 2107 } 2108 } 2109 if (col <= lastcol) low = 0; 2110 else high = nrow; 2111 lastcol = col; 2112 while (high - low > 7) { 2113 t = (low + high) / 2; 2114 if (rp[t] > col) high = t; 2115 else low = t; 2116 } 2117 for (i = low; i < high; i++) { 2118 if (rp[i] > col) break; 2119 if (rp[i] == col) { 2120 if (A->structure_only) goto noinsert2; 2121 bap = ap + bs2 * i; 2122 if (roworiented) { 2123 if (is == ADD_VALUES) { 2124 for (ii = 0; ii < bs; ii++, value += stepval) { 2125 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2126 } 2127 } else { 2128 for (ii = 0; ii < bs; ii++, value += stepval) { 2129 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2130 } 2131 } 2132 } else { 2133 if (is == ADD_VALUES) { 2134 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2135 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2136 bap += bs; 2137 } 2138 } else { 2139 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2140 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2141 bap += bs; 2142 } 2143 } 2144 } 2145 goto noinsert2; 2146 } 2147 } 2148 if (nonew == 1) goto noinsert2; 2149 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2150 if (A->structure_only) { 2151 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2152 } else { 2153 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2154 } 2155 N = nrow++ - 1; 2156 high++; 2157 /* shift up all the later entries in this row */ 2158 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2159 rp[i] = col; 2160 if (!A->structure_only) { 2161 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2162 bap = ap + bs2 * i; 2163 if (roworiented) { 2164 for (ii = 0; ii < bs; ii++, value += stepval) { 2165 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2166 } 2167 } else { 2168 for (ii = 0; ii < bs; ii++, value += stepval) { 2169 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2170 } 2171 } 2172 } 2173 noinsert2:; 2174 low = i; 2175 } 2176 ailen[row] = nrow; 2177 } 2178 PetscFunctionReturn(PETSC_SUCCESS); 2179 } 2180 2181 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2182 { 2183 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2184 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2185 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2186 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2187 MatScalar *aa = a->a, *ap; 2188 PetscReal ratio = 0.6; 2189 2190 PetscFunctionBegin; 2191 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2192 2193 if (m) rmax = ailen[0]; 2194 for (i = 1; i < mbs; i++) { 2195 /* move each row back by the amount of empty slots (fshift) before it*/ 2196 fshift += imax[i - 1] - ailen[i - 1]; 2197 rmax = PetscMax(rmax, ailen[i]); 2198 if (fshift) { 2199 ip = aj + ai[i]; 2200 ap = aa + bs2 * ai[i]; 2201 N = ailen[i]; 2202 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2203 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2204 } 2205 ai[i] = ai[i - 1] + ailen[i - 1]; 2206 } 2207 if (mbs) { 2208 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2209 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2210 } 2211 2212 /* reset ilen and imax for each row */ 2213 a->nonzerorowcnt = 0; 2214 if (A->structure_only) { 2215 PetscCall(PetscFree2(a->imax, a->ilen)); 2216 } else { /* !A->structure_only */ 2217 for (i = 0; i < mbs; i++) { 2218 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2219 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2220 } 2221 } 2222 a->nz = ai[mbs]; 2223 2224 /* diagonals may have moved, so kill the diagonal pointers */ 2225 a->idiagvalid = PETSC_FALSE; 2226 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2227 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2228 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2229 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2230 2231 A->info.mallocs += a->reallocs; 2232 a->reallocs = 0; 2233 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2234 a->rmax = rmax; 2235 2236 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2237 PetscFunctionReturn(PETSC_SUCCESS); 2238 } 2239 2240 /* 2241 This function returns an array of flags which indicate the locations of contiguous 2242 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2243 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2244 Assume: sizes should be long enough to hold all the values. 2245 */ 2246 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2247 { 2248 PetscInt j = 0; 2249 2250 PetscFunctionBegin; 2251 for (PetscInt i = 0; i < n; j++) { 2252 PetscInt row = idx[i]; 2253 if (row % bs != 0) { /* Not the beginning of a block */ 2254 sizes[j] = 1; 2255 i++; 2256 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2257 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2258 i++; 2259 } else { /* Beginning of the block, so check if the complete block exists */ 2260 PetscBool flg = PETSC_TRUE; 2261 for (PetscInt k = 1; k < bs; k++) { 2262 if (row + k != idx[i + k]) { /* break in the block */ 2263 flg = PETSC_FALSE; 2264 break; 2265 } 2266 } 2267 if (flg) { /* No break in the bs */ 2268 sizes[j] = bs; 2269 i += bs; 2270 } else { 2271 sizes[j] = 1; 2272 i++; 2273 } 2274 } 2275 } 2276 *bs_max = j; 2277 PetscFunctionReturn(PETSC_SUCCESS); 2278 } 2279 2280 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2281 { 2282 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2283 PetscInt i, j, k, count, *rows; 2284 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2285 PetscScalar zero = 0.0; 2286 MatScalar *aa; 2287 const PetscScalar *xx; 2288 PetscScalar *bb; 2289 2290 PetscFunctionBegin; 2291 /* fix right-hand side if needed */ 2292 if (x && b) { 2293 PetscCall(VecGetArrayRead(x, &xx)); 2294 PetscCall(VecGetArray(b, &bb)); 2295 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2296 PetscCall(VecRestoreArrayRead(x, &xx)); 2297 PetscCall(VecRestoreArray(b, &bb)); 2298 } 2299 2300 /* Make a copy of the IS and sort it */ 2301 /* allocate memory for rows,sizes */ 2302 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2303 2304 /* copy IS values to rows, and sort them */ 2305 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2306 PetscCall(PetscSortInt(is_n, rows)); 2307 2308 if (baij->keepnonzeropattern) { 2309 for (i = 0; i < is_n; i++) sizes[i] = 1; 2310 bs_max = is_n; 2311 } else { 2312 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2313 A->nonzerostate++; 2314 } 2315 2316 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2317 row = rows[j]; 2318 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2319 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2320 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2321 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2322 if (diag != (PetscScalar)0.0) { 2323 if (baij->ilen[row / bs] > 0) { 2324 baij->ilen[row / bs] = 1; 2325 baij->j[baij->i[row / bs]] = row / bs; 2326 2327 PetscCall(PetscArrayzero(aa, count * bs)); 2328 } 2329 /* Now insert all the diagonal values for this bs */ 2330 for (k = 0; k < bs; k++) PetscUseTypeMethod(A, setvalues, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES); 2331 } else { /* (diag == 0.0) */ 2332 baij->ilen[row / bs] = 0; 2333 } /* end (diag == 0.0) */ 2334 } else { /* (sizes[i] != bs) */ 2335 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2336 for (k = 0; k < count; k++) { 2337 aa[0] = zero; 2338 aa += bs; 2339 } 2340 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES); 2341 } 2342 } 2343 2344 PetscCall(PetscFree2(rows, sizes)); 2345 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2346 PetscFunctionReturn(PETSC_SUCCESS); 2347 } 2348 2349 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2350 { 2351 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2352 PetscInt i, j, k, count; 2353 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2354 PetscScalar zero = 0.0; 2355 MatScalar *aa; 2356 const PetscScalar *xx; 2357 PetscScalar *bb; 2358 PetscBool *zeroed, vecs = PETSC_FALSE; 2359 2360 PetscFunctionBegin; 2361 /* fix right-hand side if needed */ 2362 if (x && b) { 2363 PetscCall(VecGetArrayRead(x, &xx)); 2364 PetscCall(VecGetArray(b, &bb)); 2365 vecs = PETSC_TRUE; 2366 } 2367 2368 /* zero the columns */ 2369 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2370 for (i = 0; i < is_n; i++) { 2371 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2372 zeroed[is_idx[i]] = PETSC_TRUE; 2373 } 2374 for (i = 0; i < A->rmap->N; i++) { 2375 if (!zeroed[i]) { 2376 row = i / bs; 2377 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2378 for (k = 0; k < bs; k++) { 2379 col = bs * baij->j[j] + k; 2380 if (zeroed[col]) { 2381 aa = baij->a + j * bs2 + (i % bs) + bs * k; 2382 if (vecs) bb[i] -= aa[0] * xx[col]; 2383 aa[0] = 0.0; 2384 } 2385 } 2386 } 2387 } else if (vecs) bb[i] = diag * xx[i]; 2388 } 2389 PetscCall(PetscFree(zeroed)); 2390 if (vecs) { 2391 PetscCall(VecRestoreArrayRead(x, &xx)); 2392 PetscCall(VecRestoreArray(b, &bb)); 2393 } 2394 2395 /* zero the rows */ 2396 for (i = 0; i < is_n; i++) { 2397 row = is_idx[i]; 2398 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2399 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 2400 for (k = 0; k < count; k++) { 2401 aa[0] = zero; 2402 aa += bs; 2403 } 2404 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2405 } 2406 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2407 PetscFunctionReturn(PETSC_SUCCESS); 2408 } 2409 2410 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2411 { 2412 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2413 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2414 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2415 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2416 PetscInt ridx, cidx, bs2 = a->bs2; 2417 PetscBool roworiented = a->roworiented; 2418 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2419 2420 PetscFunctionBegin; 2421 for (k = 0; k < m; k++) { /* loop over added rows */ 2422 row = im[k]; 2423 brow = row / bs; 2424 if (row < 0) continue; 2425 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2426 rp = PetscSafePointerPlusOffset(aj, ai[brow]); 2427 if (!A->structure_only) ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); 2428 rmax = imax[brow]; 2429 nrow = ailen[brow]; 2430 low = 0; 2431 high = nrow; 2432 for (l = 0; l < n; l++) { /* loop over added columns */ 2433 if (in[l] < 0) continue; 2434 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2435 col = in[l]; 2436 bcol = col / bs; 2437 ridx = row % bs; 2438 cidx = col % bs; 2439 if (!A->structure_only) { 2440 if (roworiented) { 2441 value = v[l + k * n]; 2442 } else { 2443 value = v[k + l * m]; 2444 } 2445 } 2446 if (col <= lastcol) low = 0; 2447 else high = nrow; 2448 lastcol = col; 2449 while (high - low > 7) { 2450 t = (low + high) / 2; 2451 if (rp[t] > bcol) high = t; 2452 else low = t; 2453 } 2454 for (i = low; i < high; i++) { 2455 if (rp[i] > bcol) break; 2456 if (rp[i] == bcol) { 2457 bap = PetscSafePointerPlusOffset(ap, bs2 * i + bs * cidx + ridx); 2458 if (!A->structure_only) { 2459 if (is == ADD_VALUES) *bap += value; 2460 else *bap = value; 2461 } 2462 goto noinsert1; 2463 } 2464 } 2465 if (nonew == 1) goto noinsert1; 2466 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2467 if (A->structure_only) { 2468 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2469 } else { 2470 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2471 } 2472 N = nrow++ - 1; 2473 high++; 2474 /* shift up all the later entries in this row */ 2475 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2476 rp[i] = bcol; 2477 if (!A->structure_only) { 2478 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2479 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2480 ap[bs2 * i + bs * cidx + ridx] = value; 2481 } 2482 a->nz++; 2483 noinsert1:; 2484 low = i; 2485 } 2486 ailen[brow] = nrow; 2487 } 2488 PetscFunctionReturn(PETSC_SUCCESS); 2489 } 2490 2491 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2492 { 2493 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2494 Mat outA; 2495 PetscBool row_identity, col_identity; 2496 2497 PetscFunctionBegin; 2498 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2499 PetscCall(ISIdentity(row, &row_identity)); 2500 PetscCall(ISIdentity(col, &col_identity)); 2501 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2502 2503 outA = inA; 2504 inA->factortype = MAT_FACTOR_LU; 2505 PetscCall(PetscFree(inA->solvertype)); 2506 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2507 2508 PetscCall(PetscObjectReference((PetscObject)row)); 2509 PetscCall(ISDestroy(&a->row)); 2510 a->row = row; 2511 PetscCall(PetscObjectReference((PetscObject)col)); 2512 PetscCall(ISDestroy(&a->col)); 2513 a->col = col; 2514 2515 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2516 PetscCall(ISDestroy(&a->icol)); 2517 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2518 2519 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2520 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2521 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2522 PetscFunctionReturn(PETSC_SUCCESS); 2523 } 2524 2525 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2526 { 2527 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2528 2529 PetscFunctionBegin; 2530 baij->nz = baij->maxnz; 2531 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2532 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2533 PetscFunctionReturn(PETSC_SUCCESS); 2534 } 2535 2536 /*@ 2537 MatSeqBAIJSetColumnIndices - Set the column indices for all the block rows in the matrix. 2538 2539 Input Parameters: 2540 + mat - the `MATSEQBAIJ` matrix 2541 - indices - the block column indices 2542 2543 Level: advanced 2544 2545 Notes: 2546 This can be called if you have precomputed the nonzero structure of the 2547 matrix and want to provide it to the matrix object to improve the performance 2548 of the `MatSetValues()` operation. 2549 2550 You MUST have set the correct numbers of nonzeros per row in the call to 2551 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2552 2553 MUST be called before any calls to `MatSetValues()` 2554 2555 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2556 @*/ 2557 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2558 { 2559 PetscFunctionBegin; 2560 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2561 PetscAssertPointer(indices, 2); 2562 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, const PetscInt *), (mat, (const PetscInt *)indices)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2567 { 2568 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2569 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2570 PetscReal atmp; 2571 PetscScalar *x, zero = 0.0; 2572 MatScalar *aa; 2573 PetscInt ncols, brow, krow, kcol; 2574 2575 PetscFunctionBegin; 2576 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2577 bs = A->rmap->bs; 2578 aa = a->a; 2579 ai = a->i; 2580 aj = a->j; 2581 mbs = a->mbs; 2582 2583 PetscCall(VecSet(v, zero)); 2584 PetscCall(VecGetArray(v, &x)); 2585 PetscCall(VecGetLocalSize(v, &n)); 2586 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2587 for (i = 0; i < mbs; i++) { 2588 ncols = ai[1] - ai[0]; 2589 ai++; 2590 brow = bs * i; 2591 for (j = 0; j < ncols; j++) { 2592 for (kcol = 0; kcol < bs; kcol++) { 2593 for (krow = 0; krow < bs; krow++) { 2594 atmp = PetscAbsScalar(*aa); 2595 aa++; 2596 row = brow + krow; /* row index */ 2597 if (PetscAbsScalar(x[row]) < atmp) { 2598 x[row] = atmp; 2599 if (idx) idx[row] = bs * (*aj) + kcol; 2600 } 2601 } 2602 } 2603 aj++; 2604 } 2605 } 2606 PetscCall(VecRestoreArray(v, &x)); 2607 PetscFunctionReturn(PETSC_SUCCESS); 2608 } 2609 2610 static PetscErrorCode MatGetRowSumAbs_SeqBAIJ(Mat A, Vec v) 2611 { 2612 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2613 PetscInt i, j, n, row, bs, *ai, mbs; 2614 PetscReal atmp; 2615 PetscScalar *x, zero = 0.0; 2616 MatScalar *aa; 2617 PetscInt ncols, brow, krow, kcol; 2618 2619 PetscFunctionBegin; 2620 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2621 bs = A->rmap->bs; 2622 aa = a->a; 2623 ai = a->i; 2624 mbs = a->mbs; 2625 2626 PetscCall(VecSet(v, zero)); 2627 PetscCall(VecGetArrayWrite(v, &x)); 2628 PetscCall(VecGetLocalSize(v, &n)); 2629 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2630 for (i = 0; i < mbs; i++) { 2631 ncols = ai[1] - ai[0]; 2632 ai++; 2633 brow = bs * i; 2634 for (j = 0; j < ncols; j++) { 2635 for (kcol = 0; kcol < bs; kcol++) { 2636 for (krow = 0; krow < bs; krow++) { 2637 atmp = PetscAbsScalar(*aa); 2638 aa++; 2639 row = brow + krow; /* row index */ 2640 x[row] += atmp; 2641 } 2642 } 2643 } 2644 } 2645 PetscCall(VecRestoreArrayWrite(v, &x)); 2646 PetscFunctionReturn(PETSC_SUCCESS); 2647 } 2648 2649 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2650 { 2651 PetscFunctionBegin; 2652 /* If the two matrices have the same copy implementation, use fast copy. */ 2653 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2654 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2655 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2656 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2657 2658 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2659 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2660 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2661 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2662 } else { 2663 PetscCall(MatCopy_Basic(A, B, str)); 2664 } 2665 PetscFunctionReturn(PETSC_SUCCESS); 2666 } 2667 2668 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2669 { 2670 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2671 2672 PetscFunctionBegin; 2673 *array = a->a; 2674 PetscFunctionReturn(PETSC_SUCCESS); 2675 } 2676 2677 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2678 { 2679 PetscFunctionBegin; 2680 *array = NULL; 2681 PetscFunctionReturn(PETSC_SUCCESS); 2682 } 2683 2684 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2685 { 2686 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2687 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2688 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2689 2690 PetscFunctionBegin; 2691 /* Set the number of nonzeros in the new matrix */ 2692 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2693 PetscFunctionReturn(PETSC_SUCCESS); 2694 } 2695 2696 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2697 { 2698 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2699 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2700 PetscBLASInt one = 1; 2701 2702 PetscFunctionBegin; 2703 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2704 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2705 if (e) { 2706 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2707 if (e) { 2708 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2709 if (e) str = SAME_NONZERO_PATTERN; 2710 } 2711 } 2712 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2713 } 2714 if (str == SAME_NONZERO_PATTERN) { 2715 PetscScalar alpha = a; 2716 PetscBLASInt bnz; 2717 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2718 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2719 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2720 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2721 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2722 } else { 2723 Mat B; 2724 PetscInt *nnz; 2725 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2726 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2727 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2728 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2729 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2730 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2731 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2732 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2733 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2734 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2735 PetscCall(MatHeaderMerge(Y, &B)); 2736 PetscCall(PetscFree(nnz)); 2737 } 2738 PetscFunctionReturn(PETSC_SUCCESS); 2739 } 2740 2741 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2742 { 2743 #if PetscDefined(USE_COMPLEX) 2744 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2745 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2746 MatScalar *aa = a->a; 2747 2748 PetscFunctionBegin; 2749 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2750 PetscFunctionReturn(PETSC_SUCCESS); 2751 #else 2752 (void)A; 2753 return PETSC_SUCCESS; 2754 #endif 2755 } 2756 2757 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2758 { 2759 #if PetscDefined(USE_COMPLEX) 2760 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2761 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2762 MatScalar *aa = a->a; 2763 2764 PetscFunctionBegin; 2765 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2766 PetscFunctionReturn(PETSC_SUCCESS); 2767 #else 2768 (void)A; 2769 return PETSC_SUCCESS; 2770 #endif 2771 } 2772 2773 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2774 { 2775 #if PetscDefined(USE_COMPLEX) 2776 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2777 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2778 MatScalar *aa = a->a; 2779 2780 PetscFunctionBegin; 2781 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2782 PetscFunctionReturn(PETSC_SUCCESS); 2783 #else 2784 (void)A; 2785 return PETSC_SUCCESS; 2786 #endif 2787 } 2788 2789 /* 2790 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2791 */ 2792 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2793 { 2794 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2795 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2796 PetscInt nz = a->i[m], row, *jj, mr, col; 2797 2798 PetscFunctionBegin; 2799 *nn = n; 2800 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2801 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2802 PetscCall(PetscCalloc1(n, &collengths)); 2803 PetscCall(PetscMalloc1(n + 1, &cia)); 2804 PetscCall(PetscMalloc1(nz, &cja)); 2805 jj = a->j; 2806 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2807 cia[0] = oshift; 2808 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2809 PetscCall(PetscArrayzero(collengths, n)); 2810 jj = a->j; 2811 for (row = 0; row < m; row++) { 2812 mr = a->i[row + 1] - a->i[row]; 2813 for (i = 0; i < mr; i++) { 2814 col = *jj++; 2815 2816 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2817 } 2818 } 2819 PetscCall(PetscFree(collengths)); 2820 *ia = cia; 2821 *ja = cja; 2822 PetscFunctionReturn(PETSC_SUCCESS); 2823 } 2824 2825 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2826 { 2827 PetscFunctionBegin; 2828 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2829 PetscCall(PetscFree(*ia)); 2830 PetscCall(PetscFree(*ja)); 2831 PetscFunctionReturn(PETSC_SUCCESS); 2832 } 2833 2834 /* 2835 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2836 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2837 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2838 */ 2839 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2840 { 2841 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2842 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2843 PetscInt nz = a->i[m], row, *jj, mr, col; 2844 PetscInt *cspidx; 2845 2846 PetscFunctionBegin; 2847 *nn = n; 2848 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2849 2850 PetscCall(PetscCalloc1(n, &collengths)); 2851 PetscCall(PetscMalloc1(n + 1, &cia)); 2852 PetscCall(PetscMalloc1(nz, &cja)); 2853 PetscCall(PetscMalloc1(nz, &cspidx)); 2854 jj = a->j; 2855 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2856 cia[0] = oshift; 2857 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2858 PetscCall(PetscArrayzero(collengths, n)); 2859 jj = a->j; 2860 for (row = 0; row < m; row++) { 2861 mr = a->i[row + 1] - a->i[row]; 2862 for (i = 0; i < mr; i++) { 2863 col = *jj++; 2864 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2865 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2866 } 2867 } 2868 PetscCall(PetscFree(collengths)); 2869 *ia = cia; 2870 *ja = cja; 2871 *spidx = cspidx; 2872 PetscFunctionReturn(PETSC_SUCCESS); 2873 } 2874 2875 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2876 { 2877 PetscFunctionBegin; 2878 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2879 PetscCall(PetscFree(*spidx)); 2880 PetscFunctionReturn(PETSC_SUCCESS); 2881 } 2882 2883 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2884 { 2885 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2886 2887 PetscFunctionBegin; 2888 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2889 PetscCall(MatShift_Basic(Y, a)); 2890 PetscFunctionReturn(PETSC_SUCCESS); 2891 } 2892 2893 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 2894 { 2895 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2896 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 2897 PetscInt m = A->rmap->N, *ailen = a->ilen; 2898 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2899 MatScalar *aa = a->a, *ap; 2900 PetscBool zero; 2901 2902 PetscFunctionBegin; 2903 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 2904 if (m) rmax = ailen[0]; 2905 for (i = 1; i <= mbs; i++) { 2906 for (k = ai[i - 1]; k < ai[i]; k++) { 2907 zero = PETSC_TRUE; 2908 ap = aa + bs2 * k; 2909 for (j = 0; j < bs2 && zero; j++) { 2910 if (ap[j] != 0.0) zero = PETSC_FALSE; 2911 } 2912 if (zero && (aj[k] != i - 1 || !keep)) fshift++; 2913 else { 2914 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 2915 aj[k - fshift] = aj[k]; 2916 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 2917 } 2918 } 2919 ai[i - 1] -= fshift_prev; 2920 fshift_prev = fshift; 2921 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 2922 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 2923 rmax = PetscMax(rmax, ailen[i - 1]); 2924 } 2925 if (fshift) { 2926 if (mbs) { 2927 ai[mbs] -= fshift; 2928 a->nz = ai[mbs]; 2929 } 2930 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 2931 A->nonzerostate++; 2932 A->info.nz_unneeded += (PetscReal)fshift; 2933 a->rmax = rmax; 2934 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2935 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2936 } 2937 PetscFunctionReturn(PETSC_SUCCESS); 2938 } 2939 2940 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2941 MatGetRow_SeqBAIJ, 2942 MatRestoreRow_SeqBAIJ, 2943 MatMult_SeqBAIJ_N, 2944 /* 4*/ MatMultAdd_SeqBAIJ_N, 2945 MatMultTranspose_SeqBAIJ, 2946 MatMultTransposeAdd_SeqBAIJ, 2947 NULL, 2948 NULL, 2949 NULL, 2950 /* 10*/ NULL, 2951 MatLUFactor_SeqBAIJ, 2952 NULL, 2953 NULL, 2954 MatTranspose_SeqBAIJ, 2955 /* 15*/ MatGetInfo_SeqBAIJ, 2956 MatEqual_SeqBAIJ, 2957 MatGetDiagonal_SeqBAIJ, 2958 MatDiagonalScale_SeqBAIJ, 2959 MatNorm_SeqBAIJ, 2960 /* 20*/ NULL, 2961 MatAssemblyEnd_SeqBAIJ, 2962 MatSetOption_SeqBAIJ, 2963 MatZeroEntries_SeqBAIJ, 2964 /* 24*/ MatZeroRows_SeqBAIJ, 2965 NULL, 2966 NULL, 2967 NULL, 2968 NULL, 2969 /* 29*/ MatSetUp_Seq_Hash, 2970 NULL, 2971 NULL, 2972 NULL, 2973 NULL, 2974 /* 34*/ MatDuplicate_SeqBAIJ, 2975 NULL, 2976 NULL, 2977 MatILUFactor_SeqBAIJ, 2978 NULL, 2979 /* 39*/ MatAXPY_SeqBAIJ, 2980 MatCreateSubMatrices_SeqBAIJ, 2981 MatIncreaseOverlap_SeqBAIJ, 2982 MatGetValues_SeqBAIJ, 2983 MatCopy_SeqBAIJ, 2984 /* 44*/ NULL, 2985 MatScale_SeqBAIJ, 2986 MatShift_SeqBAIJ, 2987 NULL, 2988 MatZeroRowsColumns_SeqBAIJ, 2989 /* 49*/ NULL, 2990 MatGetRowIJ_SeqBAIJ, 2991 MatRestoreRowIJ_SeqBAIJ, 2992 MatGetColumnIJ_SeqBAIJ, 2993 MatRestoreColumnIJ_SeqBAIJ, 2994 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2995 NULL, 2996 NULL, 2997 NULL, 2998 MatSetValuesBlocked_SeqBAIJ, 2999 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3000 MatDestroy_SeqBAIJ, 3001 MatView_SeqBAIJ, 3002 NULL, 3003 NULL, 3004 /* 64*/ NULL, 3005 NULL, 3006 NULL, 3007 NULL, 3008 MatGetRowMaxAbs_SeqBAIJ, 3009 /* 69*/ NULL, 3010 MatConvert_Basic, 3011 NULL, 3012 MatFDColoringApply_BAIJ, 3013 NULL, 3014 /* 74*/ NULL, 3015 NULL, 3016 NULL, 3017 NULL, 3018 MatLoad_SeqBAIJ, 3019 /* 79*/ NULL, 3020 NULL, 3021 NULL, 3022 NULL, 3023 NULL, 3024 /* 84*/ NULL, 3025 NULL, 3026 NULL, 3027 NULL, 3028 NULL, 3029 /* 89*/ NULL, 3030 NULL, 3031 NULL, 3032 NULL, 3033 MatConjugate_SeqBAIJ, 3034 /* 94*/ NULL, 3035 NULL, 3036 MatRealPart_SeqBAIJ, 3037 MatImaginaryPart_SeqBAIJ, 3038 NULL, 3039 /* 99*/ NULL, 3040 NULL, 3041 NULL, 3042 NULL, 3043 NULL, 3044 /*104*/ NULL, 3045 NULL, 3046 NULL, 3047 NULL, 3048 NULL, 3049 /*109*/ NULL, 3050 NULL, 3051 MatMultHermitianTranspose_SeqBAIJ, 3052 MatMultHermitianTransposeAdd_SeqBAIJ, 3053 NULL, 3054 /*114*/ NULL, 3055 MatGetColumnReductions_SeqBAIJ, 3056 MatInvertBlockDiagonal_SeqBAIJ, 3057 NULL, 3058 NULL, 3059 /*119*/ NULL, 3060 NULL, 3061 NULL, 3062 NULL, 3063 NULL, 3064 /*124*/ NULL, 3065 NULL, 3066 MatSetBlockSizes_Default, 3067 NULL, 3068 MatFDColoringSetUp_SeqXAIJ, 3069 /*129*/ NULL, 3070 MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3071 MatDestroySubMatrices_SeqBAIJ, 3072 NULL, 3073 NULL, 3074 /*134*/ NULL, 3075 NULL, 3076 MatEliminateZeros_SeqBAIJ, 3077 MatGetRowSumAbs_SeqBAIJ, 3078 NULL, 3079 /*139*/ NULL, 3080 NULL, 3081 MatCopyHashToXAIJ_Seq_Hash, 3082 NULL, 3083 NULL}; 3084 3085 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3086 { 3087 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3088 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3089 3090 PetscFunctionBegin; 3091 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3092 3093 /* allocate space for values if not already there */ 3094 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3095 3096 /* copy values over */ 3097 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3098 PetscFunctionReturn(PETSC_SUCCESS); 3099 } 3100 3101 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3102 { 3103 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3104 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3105 3106 PetscFunctionBegin; 3107 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3108 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3109 3110 /* copy values over */ 3111 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3112 PetscFunctionReturn(PETSC_SUCCESS); 3113 } 3114 3115 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3116 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3117 3118 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3119 { 3120 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3121 PetscInt i, mbs, nbs, bs2; 3122 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3123 3124 PetscFunctionBegin; 3125 if (B->hash_active) { 3126 PetscInt bs; 3127 B->ops[0] = b->cops; 3128 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3129 PetscCall(MatGetBlockSize(B, &bs)); 3130 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3131 PetscCall(PetscFree(b->dnz)); 3132 PetscCall(PetscFree(b->bdnz)); 3133 B->hash_active = PETSC_FALSE; 3134 } 3135 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3136 if (nz == MAT_SKIP_ALLOCATION) { 3137 skipallocation = PETSC_TRUE; 3138 nz = 0; 3139 } 3140 3141 PetscCall(MatSetBlockSize(B, bs)); 3142 PetscCall(PetscLayoutSetUp(B->rmap)); 3143 PetscCall(PetscLayoutSetUp(B->cmap)); 3144 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3145 3146 B->preallocated = PETSC_TRUE; 3147 3148 mbs = B->rmap->n / bs; 3149 nbs = B->cmap->n / bs; 3150 bs2 = bs * bs; 3151 3152 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3153 3154 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3155 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3156 if (nnz) { 3157 for (i = 0; i < mbs; i++) { 3158 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3159 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3160 } 3161 } 3162 3163 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3164 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3165 PetscOptionsEnd(); 3166 3167 if (!flg) { 3168 switch (bs) { 3169 case 1: 3170 B->ops->mult = MatMult_SeqBAIJ_1; 3171 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3172 break; 3173 case 2: 3174 B->ops->mult = MatMult_SeqBAIJ_2; 3175 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3176 break; 3177 case 3: 3178 B->ops->mult = MatMult_SeqBAIJ_3; 3179 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3180 break; 3181 case 4: 3182 B->ops->mult = MatMult_SeqBAIJ_4; 3183 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3184 break; 3185 case 5: 3186 B->ops->mult = MatMult_SeqBAIJ_5; 3187 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3188 break; 3189 case 6: 3190 B->ops->mult = MatMult_SeqBAIJ_6; 3191 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3192 break; 3193 case 7: 3194 B->ops->mult = MatMult_SeqBAIJ_7; 3195 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3196 break; 3197 case 9: { 3198 PetscInt version = 1; 3199 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3200 switch (version) { 3201 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3202 case 1: 3203 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3204 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3205 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3206 break; 3207 #endif 3208 default: 3209 B->ops->mult = MatMult_SeqBAIJ_N; 3210 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3211 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3212 break; 3213 } 3214 break; 3215 } 3216 case 11: 3217 B->ops->mult = MatMult_SeqBAIJ_11; 3218 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3219 break; 3220 case 12: { 3221 PetscInt version = 1; 3222 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3223 switch (version) { 3224 case 1: 3225 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3226 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3227 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3228 break; 3229 case 2: 3230 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3231 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3232 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3233 break; 3234 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3235 case 3: 3236 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3237 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3238 PetscCall(PetscInfo(B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3239 break; 3240 #endif 3241 default: 3242 B->ops->mult = MatMult_SeqBAIJ_N; 3243 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3244 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3245 break; 3246 } 3247 break; 3248 } 3249 case 15: { 3250 PetscInt version = 1; 3251 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3252 switch (version) { 3253 case 1: 3254 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3255 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3256 break; 3257 case 2: 3258 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3259 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3260 break; 3261 case 3: 3262 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3263 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3264 break; 3265 case 4: 3266 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3267 PetscCall(PetscInfo(B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3268 break; 3269 default: 3270 B->ops->mult = MatMult_SeqBAIJ_N; 3271 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3272 break; 3273 } 3274 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3275 break; 3276 } 3277 default: 3278 B->ops->mult = MatMult_SeqBAIJ_N; 3279 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3280 PetscCall(PetscInfo(B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3281 break; 3282 } 3283 } 3284 B->ops->sor = MatSOR_SeqBAIJ; 3285 b->mbs = mbs; 3286 b->nbs = nbs; 3287 if (!skipallocation) { 3288 if (!b->imax) { 3289 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3290 3291 b->free_imax_ilen = PETSC_TRUE; 3292 } 3293 /* b->ilen will count nonzeros in each block row so far. */ 3294 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3295 if (!nnz) { 3296 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3297 else if (nz < 0) nz = 1; 3298 nz = PetscMin(nz, nbs); 3299 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3300 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3301 } else { 3302 PetscInt64 nz64 = 0; 3303 for (i = 0; i < mbs; i++) { 3304 b->imax[i] = nnz[i]; 3305 nz64 += nnz[i]; 3306 } 3307 PetscCall(PetscIntCast(nz64, &nz)); 3308 } 3309 3310 /* allocate the matrix space */ 3311 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3312 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&b->j)); 3313 PetscCall(PetscShmgetAllocateArray(B->rmap->N + 1, sizeof(PetscInt), (void **)&b->i)); 3314 if (B->structure_only) { 3315 b->free_a = PETSC_FALSE; 3316 } else { 3317 PetscInt nzbs2 = 0; 3318 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3319 PetscCall(PetscShmgetAllocateArray(nzbs2, sizeof(PetscScalar), (void **)&b->a)); 3320 b->free_a = PETSC_TRUE; 3321 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3322 } 3323 b->free_ij = PETSC_TRUE; 3324 PetscCall(PetscArrayzero(b->j, nz)); 3325 3326 b->i[0] = 0; 3327 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3328 } else { 3329 b->free_a = PETSC_FALSE; 3330 b->free_ij = PETSC_FALSE; 3331 } 3332 3333 b->bs2 = bs2; 3334 b->mbs = mbs; 3335 b->nz = 0; 3336 b->maxnz = nz; 3337 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3338 B->was_assembled = PETSC_FALSE; 3339 B->assembled = PETSC_FALSE; 3340 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3341 PetscFunctionReturn(PETSC_SUCCESS); 3342 } 3343 3344 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3345 { 3346 PetscInt i, m, nz, nz_max = 0, *nnz; 3347 PetscScalar *values = NULL; 3348 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3349 3350 PetscFunctionBegin; 3351 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3352 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3353 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3354 PetscCall(PetscLayoutSetUp(B->rmap)); 3355 PetscCall(PetscLayoutSetUp(B->cmap)); 3356 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3357 m = B->rmap->n / bs; 3358 3359 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3360 PetscCall(PetscMalloc1(m + 1, &nnz)); 3361 for (i = 0; i < m; i++) { 3362 nz = ii[i + 1] - ii[i]; 3363 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3364 nz_max = PetscMax(nz_max, nz); 3365 nnz[i] = nz; 3366 } 3367 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3368 PetscCall(PetscFree(nnz)); 3369 3370 values = (PetscScalar *)V; 3371 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3372 for (i = 0; i < m; i++) { 3373 PetscInt ncols = ii[i + 1] - ii[i]; 3374 const PetscInt *icols = jj + ii[i]; 3375 if (bs == 1 || !roworiented) { 3376 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3377 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3378 } else { 3379 PetscInt j; 3380 for (j = 0; j < ncols; j++) { 3381 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3382 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3383 } 3384 } 3385 } 3386 if (!V) PetscCall(PetscFree(values)); 3387 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3388 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3389 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3390 PetscFunctionReturn(PETSC_SUCCESS); 3391 } 3392 3393 /*@C 3394 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3395 3396 Not Collective 3397 3398 Input Parameter: 3399 . A - a `MATSEQBAIJ` matrix 3400 3401 Output Parameter: 3402 . array - pointer to the data 3403 3404 Level: intermediate 3405 3406 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3407 @*/ 3408 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar *array[]) 3409 { 3410 PetscFunctionBegin; 3411 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3412 PetscFunctionReturn(PETSC_SUCCESS); 3413 } 3414 3415 /*@C 3416 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3417 3418 Not Collective 3419 3420 Input Parameters: 3421 + A - a `MATSEQBAIJ` matrix 3422 - array - pointer to the data 3423 3424 Level: intermediate 3425 3426 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3427 @*/ 3428 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar *array[]) 3429 { 3430 PetscFunctionBegin; 3431 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3432 PetscFunctionReturn(PETSC_SUCCESS); 3433 } 3434 3435 /*MC 3436 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3437 block sparse compressed row format. 3438 3439 Options Database Keys: 3440 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3441 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3442 3443 Level: beginner 3444 3445 Notes: 3446 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3447 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3448 3449 Run with `-info` to see what version of the matrix-vector product is being used 3450 3451 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3452 M*/ 3453 3454 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3455 3456 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3457 { 3458 PetscMPIInt size; 3459 Mat_SeqBAIJ *b; 3460 3461 PetscFunctionBegin; 3462 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3463 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3464 3465 PetscCall(PetscNew(&b)); 3466 B->data = (void *)b; 3467 B->ops[0] = MatOps_Values; 3468 3469 b->row = NULL; 3470 b->col = NULL; 3471 b->icol = NULL; 3472 b->reallocs = 0; 3473 b->saved_values = NULL; 3474 3475 b->roworiented = PETSC_TRUE; 3476 b->nonew = 0; 3477 b->diag = NULL; 3478 B->spptr = NULL; 3479 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3480 b->keepnonzeropattern = PETSC_FALSE; 3481 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3486 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3487 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3488 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3489 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3490 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3491 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3492 #if defined(PETSC_HAVE_HYPRE) 3493 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3494 #endif 3495 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3496 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3497 PetscFunctionReturn(PETSC_SUCCESS); 3498 } 3499 3500 PETSC_INTERN PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3501 { 3502 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3503 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3504 3505 PetscFunctionBegin; 3506 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3507 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3508 3509 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3510 c->imax = a->imax; 3511 c->ilen = a->ilen; 3512 c->free_imax_ilen = PETSC_FALSE; 3513 } else { 3514 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3515 for (i = 0; i < mbs; i++) { 3516 c->imax[i] = a->imax[i]; 3517 c->ilen[i] = a->ilen[i]; 3518 } 3519 c->free_imax_ilen = PETSC_TRUE; 3520 } 3521 3522 /* allocate the matrix space */ 3523 if (mallocmatspace) { 3524 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3525 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3526 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3527 c->free_a = PETSC_TRUE; 3528 c->i = a->i; 3529 c->j = a->j; 3530 c->free_ij = PETSC_FALSE; 3531 c->parent = A; 3532 C->preallocated = PETSC_TRUE; 3533 C->assembled = PETSC_TRUE; 3534 3535 PetscCall(PetscObjectReference((PetscObject)A)); 3536 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3537 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3538 } else { 3539 PetscCall(PetscShmgetAllocateArray(bs2 * nz, sizeof(PetscScalar), (void **)&c->a)); 3540 PetscCall(PetscShmgetAllocateArray(nz, sizeof(PetscInt), (void **)&c->j)); 3541 PetscCall(PetscShmgetAllocateArray(mbs + 1, sizeof(PetscInt), (void **)&c->i)); 3542 c->free_a = PETSC_TRUE; 3543 c->free_ij = PETSC_TRUE; 3544 3545 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3546 if (mbs > 0) { 3547 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3548 if (cpvalues == MAT_COPY_VALUES) { 3549 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3550 } else { 3551 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3552 } 3553 } 3554 C->preallocated = PETSC_TRUE; 3555 C->assembled = PETSC_TRUE; 3556 } 3557 } 3558 3559 c->roworiented = a->roworiented; 3560 c->nonew = a->nonew; 3561 3562 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3563 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3564 3565 c->bs2 = a->bs2; 3566 c->mbs = a->mbs; 3567 c->nbs = a->nbs; 3568 c->nz = a->nz; 3569 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3570 c->solve_work = NULL; 3571 c->mult_work = NULL; 3572 c->sor_workt = NULL; 3573 c->sor_work = NULL; 3574 3575 c->compressedrow.use = a->compressedrow.use; 3576 c->compressedrow.nrows = a->compressedrow.nrows; 3577 if (a->compressedrow.use) { 3578 i = a->compressedrow.nrows; 3579 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3580 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3581 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3582 } else { 3583 c->compressedrow.use = PETSC_FALSE; 3584 c->compressedrow.i = NULL; 3585 c->compressedrow.rindex = NULL; 3586 } 3587 c->nonzerorowcnt = a->nonzerorowcnt; 3588 C->nonzerostate = A->nonzerostate; 3589 3590 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3591 PetscFunctionReturn(PETSC_SUCCESS); 3592 } 3593 3594 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3595 { 3596 PetscFunctionBegin; 3597 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3598 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3599 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3600 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3601 PetscFunctionReturn(PETSC_SUCCESS); 3602 } 3603 3604 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3605 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3606 { 3607 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3608 PetscInt *rowidxs, *colidxs; 3609 PetscScalar *matvals; 3610 3611 PetscFunctionBegin; 3612 PetscCall(PetscViewerSetUp(viewer)); 3613 3614 /* read matrix header */ 3615 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3616 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3617 M = header[1]; 3618 N = header[2]; 3619 nz = header[3]; 3620 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3621 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3622 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3623 3624 /* set block sizes from the viewer's .info file */ 3625 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3626 /* set local and global sizes if not set already */ 3627 if (mat->rmap->n < 0) mat->rmap->n = M; 3628 if (mat->cmap->n < 0) mat->cmap->n = N; 3629 if (mat->rmap->N < 0) mat->rmap->N = M; 3630 if (mat->cmap->N < 0) mat->cmap->N = N; 3631 PetscCall(PetscLayoutSetUp(mat->rmap)); 3632 PetscCall(PetscLayoutSetUp(mat->cmap)); 3633 3634 /* check if the matrix sizes are correct */ 3635 PetscCall(MatGetSize(mat, &rows, &cols)); 3636 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3637 PetscCall(MatGetBlockSize(mat, &bs)); 3638 PetscCall(MatGetLocalSize(mat, &m, &n)); 3639 mbs = m / bs; 3640 nbs = n / bs; 3641 3642 /* read in row lengths, column indices and nonzero values */ 3643 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3644 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3645 rowidxs[0] = 0; 3646 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3647 sum = rowidxs[m]; 3648 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3649 3650 /* read in column indices and nonzero values */ 3651 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3652 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3653 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3654 3655 { /* preallocate matrix storage */ 3656 PetscBT bt; /* helper bit set to count nonzeros */ 3657 PetscInt *nnz; 3658 PetscBool sbaij; 3659 3660 PetscCall(PetscBTCreate(nbs, &bt)); 3661 PetscCall(PetscCalloc1(mbs, &nnz)); 3662 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3663 for (i = 0; i < mbs; i++) { 3664 PetscCall(PetscBTMemzero(nbs, bt)); 3665 for (k = 0; k < bs; k++) { 3666 PetscInt row = bs * i + k; 3667 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3668 PetscInt col = colidxs[j]; 3669 if (!sbaij || col >= row) 3670 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3671 } 3672 } 3673 } 3674 PetscCall(PetscBTDestroy(&bt)); 3675 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3676 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3677 PetscCall(PetscFree(nnz)); 3678 } 3679 3680 /* store matrix values */ 3681 for (i = 0; i < m; i++) { 3682 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3683 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3684 } 3685 3686 PetscCall(PetscFree(rowidxs)); 3687 PetscCall(PetscFree2(colidxs, matvals)); 3688 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3689 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3690 PetscFunctionReturn(PETSC_SUCCESS); 3691 } 3692 3693 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3694 { 3695 PetscBool isbinary; 3696 3697 PetscFunctionBegin; 3698 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3699 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3700 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3701 PetscFunctionReturn(PETSC_SUCCESS); 3702 } 3703 3704 /*@ 3705 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3706 compressed row) format. For good matrix assembly performance the 3707 user should preallocate the matrix storage by setting the parameter `nz` 3708 (or the array `nnz`). 3709 3710 Collective 3711 3712 Input Parameters: 3713 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3714 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3715 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3716 . m - number of rows 3717 . n - number of columns 3718 . nz - number of nonzero blocks per block row (same for all rows) 3719 - nnz - array containing the number of nonzero blocks in the various block rows 3720 (possibly different for each block row) or `NULL` 3721 3722 Output Parameter: 3723 . A - the matrix 3724 3725 Options Database Keys: 3726 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3727 - -mat_block_size - size of the blocks to use 3728 3729 Level: intermediate 3730 3731 Notes: 3732 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3733 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3734 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3735 3736 The number of rows and columns must be divisible by blocksize. 3737 3738 If the `nnz` parameter is given then the `nz` parameter is ignored 3739 3740 A nonzero block is any block that as 1 or more nonzeros in it 3741 3742 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3743 storage. That is, the stored row and column indices can begin at 3744 either one (as in Fortran) or zero. 3745 3746 Specify the preallocated storage with either `nz` or `nnz` (not both). 3747 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3748 allocation. See [Sparse Matrices](sec_matsparse) for details. 3749 matrices. 3750 3751 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3752 @*/ 3753 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3754 { 3755 PetscFunctionBegin; 3756 PetscCall(MatCreate(comm, A)); 3757 PetscCall(MatSetSizes(*A, m, n, m, n)); 3758 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3759 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3760 PetscFunctionReturn(PETSC_SUCCESS); 3761 } 3762 3763 /*@ 3764 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3765 per row in the matrix. For good matrix assembly performance the 3766 user should preallocate the matrix storage by setting the parameter `nz` 3767 (or the array `nnz`). 3768 3769 Collective 3770 3771 Input Parameters: 3772 + B - the matrix 3773 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3774 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3775 . nz - number of block nonzeros per block row (same for all rows) 3776 - nnz - array containing the number of block nonzeros in the various block rows 3777 (possibly different for each block row) or `NULL` 3778 3779 Options Database Keys: 3780 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3781 - -mat_block_size - size of the blocks to use 3782 3783 Level: intermediate 3784 3785 Notes: 3786 If the `nnz` parameter is given then the `nz` parameter is ignored 3787 3788 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3789 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3790 You can also run with the option `-info` and look for messages with the string 3791 malloc in them to see if additional memory allocation was needed. 3792 3793 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3794 storage. That is, the stored row and column indices can begin at 3795 either one (as in Fortran) or zero. 3796 3797 Specify the preallocated storage with either `nz` or `nnz` (not both). 3798 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3799 allocation. See [Sparse Matrices](sec_matsparse) for details. 3800 3801 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3802 @*/ 3803 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3804 { 3805 PetscFunctionBegin; 3806 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3807 PetscValidType(B, 1); 3808 PetscValidLogicalCollectiveInt(B, bs, 2); 3809 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3810 PetscFunctionReturn(PETSC_SUCCESS); 3811 } 3812 3813 /*@C 3814 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3815 3816 Collective 3817 3818 Input Parameters: 3819 + B - the matrix 3820 . bs - the blocksize 3821 . i - the indices into `j` for the start of each local row (indices start with zero) 3822 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 3823 - v - optional values in the matrix, use `NULL` if not provided 3824 3825 Level: advanced 3826 3827 Notes: 3828 The `i`,`j`,`v` values are COPIED with this routine; to avoid the copy use `MatCreateSeqBAIJWithArrays()` 3829 3830 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3831 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3832 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3833 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3834 block column and the second index is over columns within a block. 3835 3836 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3837 3838 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3839 @*/ 3840 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3841 { 3842 PetscFunctionBegin; 3843 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3844 PetscValidType(B, 1); 3845 PetscValidLogicalCollectiveInt(B, bs, 2); 3846 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3847 PetscFunctionReturn(PETSC_SUCCESS); 3848 } 3849 3850 /*@ 3851 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3852 3853 Collective 3854 3855 Input Parameters: 3856 + comm - must be an MPI communicator of size 1 3857 . bs - size of block 3858 . m - number of rows 3859 . n - number of columns 3860 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3861 . j - column indices 3862 - a - matrix values 3863 3864 Output Parameter: 3865 . mat - the matrix 3866 3867 Level: advanced 3868 3869 Notes: 3870 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3871 once the matrix is destroyed 3872 3873 You cannot set new nonzero locations into this matrix, that will generate an error. 3874 3875 The `i` and `j` indices are 0 based 3876 3877 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3878 3879 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3880 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3881 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3882 with column-major ordering within blocks. 3883 3884 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3885 @*/ 3886 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3887 { 3888 Mat_SeqBAIJ *baij; 3889 3890 PetscFunctionBegin; 3891 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3892 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3893 3894 PetscCall(MatCreate(comm, mat)); 3895 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3896 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3897 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3898 baij = (Mat_SeqBAIJ *)(*mat)->data; 3899 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3900 3901 baij->i = i; 3902 baij->j = j; 3903 baij->a = a; 3904 3905 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3906 baij->free_a = PETSC_FALSE; 3907 baij->free_ij = PETSC_FALSE; 3908 baij->free_imax_ilen = PETSC_TRUE; 3909 3910 for (PetscInt ii = 0; ii < m; ii++) { 3911 const PetscInt row_len = i[ii + 1] - i[ii]; 3912 3913 baij->ilen[ii] = baij->imax[ii] = row_len; 3914 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3915 } 3916 if (PetscDefined(USE_DEBUG)) { 3917 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3918 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3919 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3920 } 3921 } 3922 3923 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3924 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3925 PetscFunctionReturn(PETSC_SUCCESS); 3926 } 3927 3928 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3929 { 3930 PetscFunctionBegin; 3931 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3932 PetscFunctionReturn(PETSC_SUCCESS); 3933 } 3934