1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 13 #endif 14 15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 17 #endif 18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 19 20 PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 21 { 22 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 23 PetscInt m, n, i; 24 PetscInt ib, jb, bs = A->rmap->bs; 25 MatScalar *a_val = a_aij->a; 26 27 PetscFunctionBegin; 28 PetscCall(MatGetSize(A, &m, &n)); 29 for (i = 0; i < n; i++) reductions[i] = 0.0; 30 if (type == NORM_2) { 31 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 32 for (jb = 0; jb < bs; jb++) { 33 for (ib = 0; ib < bs; ib++) { 34 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 35 a_val++; 36 } 37 } 38 } 39 } else if (type == NORM_1) { 40 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 41 for (jb = 0; jb < bs; jb++) { 42 for (ib = 0; ib < bs; ib++) { 43 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 44 a_val++; 45 } 46 } 47 } 48 } else if (type == NORM_INFINITY) { 49 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 50 for (jb = 0; jb < bs; jb++) { 51 for (ib = 0; ib < bs; ib++) { 52 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 53 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 54 a_val++; 55 } 56 } 57 } 58 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 59 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 60 for (jb = 0; jb < bs; jb++) { 61 for (ib = 0; ib < bs; ib++) { 62 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 63 a_val++; 64 } 65 } 66 } 67 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 68 for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 69 for (jb = 0; jb < bs; jb++) { 70 for (ib = 0; ib < bs; ib++) { 71 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 72 a_val++; 73 } 74 } 75 } 76 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 77 if (type == NORM_2) { 78 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 79 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (i = 0; i < n; i++) reductions[i] /= m; 81 } 82 PetscFunctionReturn(0); 83 } 84 85 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 86 { 87 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 88 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 89 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 90 PetscReal shift = 0.0; 91 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 92 93 PetscFunctionBegin; 94 allowzeropivot = PetscNot(A->erroriffailure); 95 96 if (a->idiagvalid) { 97 if (values) *values = a->idiag; 98 PetscFunctionReturn(0); 99 } 100 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 101 diag_offset = a->diag; 102 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 103 diag = a->idiag; 104 if (values) *values = a->idiag; 105 /* factor and invert each block */ 106 switch (bs) { 107 case 1: 108 for (i = 0; i < mbs; i++) { 109 odiag = v + 1 * diag_offset[i]; 110 diag[0] = odiag[0]; 111 112 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 113 if (allowzeropivot) { 114 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 115 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 116 A->factorerror_zeropivot_row = i; 117 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 118 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 119 } 120 121 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 122 diag += 1; 123 } 124 break; 125 case 2: 126 for (i = 0; i < mbs; i++) { 127 odiag = v + 4 * diag_offset[i]; 128 diag[0] = odiag[0]; 129 diag[1] = odiag[1]; 130 diag[2] = odiag[2]; 131 diag[3] = odiag[3]; 132 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 133 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 134 diag += 4; 135 } 136 break; 137 case 3: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 9 * diag_offset[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 diag[4] = odiag[4]; 145 diag[5] = odiag[5]; 146 diag[6] = odiag[6]; 147 diag[7] = odiag[7]; 148 diag[8] = odiag[8]; 149 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 150 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 151 diag += 9; 152 } 153 break; 154 case 4: 155 for (i = 0; i < mbs; i++) { 156 odiag = v + 16 * diag_offset[i]; 157 PetscCall(PetscArraycpy(diag, odiag, 16)); 158 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 159 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 160 diag += 16; 161 } 162 break; 163 case 5: 164 for (i = 0; i < mbs; i++) { 165 odiag = v + 25 * diag_offset[i]; 166 PetscCall(PetscArraycpy(diag, odiag, 25)); 167 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 168 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 169 diag += 25; 170 } 171 break; 172 case 6: 173 for (i = 0; i < mbs; i++) { 174 odiag = v + 36 * diag_offset[i]; 175 PetscCall(PetscArraycpy(diag, odiag, 36)); 176 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 177 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 178 diag += 36; 179 } 180 break; 181 case 7: 182 for (i = 0; i < mbs; i++) { 183 odiag = v + 49 * diag_offset[i]; 184 PetscCall(PetscArraycpy(diag, odiag, 49)); 185 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 186 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 187 diag += 49; 188 } 189 break; 190 default: 191 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 192 for (i = 0; i < mbs; i++) { 193 odiag = v + bs2 * diag_offset[i]; 194 PetscCall(PetscArraycpy(diag, odiag, bs2)); 195 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 196 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 197 diag += bs2; 198 } 199 PetscCall(PetscFree2(v_work, v_pivots)); 200 } 201 a->idiagvalid = PETSC_TRUE; 202 PetscFunctionReturn(0); 203 } 204 205 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 206 { 207 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 208 PetscScalar *x, *work, *w, *workt, *t; 209 const MatScalar *v, *aa = a->a, *idiag; 210 const PetscScalar *b, *xb; 211 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 212 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 213 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 214 215 PetscFunctionBegin; 216 its = its * lits; 217 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 218 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 219 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 220 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 221 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 222 223 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 224 225 if (!m) PetscFunctionReturn(0); 226 diag = a->diag; 227 idiag = a->idiag; 228 k = PetscMax(A->rmap->n, A->cmap->n); 229 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 230 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 231 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 232 work = a->mult_work; 233 t = a->sor_workt; 234 w = a->sor_work; 235 236 PetscCall(VecGetArray(xx, &x)); 237 PetscCall(VecGetArrayRead(bb, &b)); 238 239 if (flag & SOR_ZERO_INITIAL_GUESS) { 240 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 241 switch (bs) { 242 case 1: 243 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 244 t[0] = b[0]; 245 i2 = 1; 246 idiag += 1; 247 for (i = 1; i < m; i++) { 248 v = aa + ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; 252 for (j = 0; j < nz; j++) { 253 xw[0] = x[vi[j]]; 254 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 255 } 256 t[i2] = s[0]; 257 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 258 x[i2] = xw[0]; 259 idiag += 1; 260 i2 += 1; 261 } 262 break; 263 case 2: 264 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 265 t[0] = b[0]; 266 t[1] = b[1]; 267 i2 = 2; 268 idiag += 4; 269 for (i = 1; i < m; i++) { 270 v = aa + 4 * ai[i]; 271 vi = aj + ai[i]; 272 nz = diag[i] - ai[i]; 273 s[0] = b[i2]; 274 s[1] = b[i2 + 1]; 275 for (j = 0; j < nz; j++) { 276 idx = 2 * vi[j]; 277 it = 4 * j; 278 xw[0] = x[idx]; 279 xw[1] = x[1 + idx]; 280 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 281 } 282 t[i2] = s[0]; 283 t[i2 + 1] = s[1]; 284 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 285 x[i2] = xw[0]; 286 x[i2 + 1] = xw[1]; 287 idiag += 4; 288 i2 += 2; 289 } 290 break; 291 case 3: 292 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 293 t[0] = b[0]; 294 t[1] = b[1]; 295 t[2] = b[2]; 296 i2 = 3; 297 idiag += 9; 298 for (i = 1; i < m; i++) { 299 v = aa + 9 * ai[i]; 300 vi = aj + ai[i]; 301 nz = diag[i] - ai[i]; 302 s[0] = b[i2]; 303 s[1] = b[i2 + 1]; 304 s[2] = b[i2 + 2]; 305 while (nz--) { 306 idx = 3 * (*vi++); 307 xw[0] = x[idx]; 308 xw[1] = x[1 + idx]; 309 xw[2] = x[2 + idx]; 310 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 311 v += 9; 312 } 313 t[i2] = s[0]; 314 t[i2 + 1] = s[1]; 315 t[i2 + 2] = s[2]; 316 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 317 x[i2] = xw[0]; 318 x[i2 + 1] = xw[1]; 319 x[i2 + 2] = xw[2]; 320 idiag += 9; 321 i2 += 3; 322 } 323 break; 324 case 4: 325 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 326 t[0] = b[0]; 327 t[1] = b[1]; 328 t[2] = b[2]; 329 t[3] = b[3]; 330 i2 = 4; 331 idiag += 16; 332 for (i = 1; i < m; i++) { 333 v = aa + 16 * ai[i]; 334 vi = aj + ai[i]; 335 nz = diag[i] - ai[i]; 336 s[0] = b[i2]; 337 s[1] = b[i2 + 1]; 338 s[2] = b[i2 + 2]; 339 s[3] = b[i2 + 3]; 340 while (nz--) { 341 idx = 4 * (*vi++); 342 xw[0] = x[idx]; 343 xw[1] = x[1 + idx]; 344 xw[2] = x[2 + idx]; 345 xw[3] = x[3 + idx]; 346 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 347 v += 16; 348 } 349 t[i2] = s[0]; 350 t[i2 + 1] = s[1]; 351 t[i2 + 2] = s[2]; 352 t[i2 + 3] = s[3]; 353 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 354 x[i2] = xw[0]; 355 x[i2 + 1] = xw[1]; 356 x[i2 + 2] = xw[2]; 357 x[i2 + 3] = xw[3]; 358 idiag += 16; 359 i2 += 4; 360 } 361 break; 362 case 5: 363 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 364 t[0] = b[0]; 365 t[1] = b[1]; 366 t[2] = b[2]; 367 t[3] = b[3]; 368 t[4] = b[4]; 369 i2 = 5; 370 idiag += 25; 371 for (i = 1; i < m; i++) { 372 v = aa + 25 * ai[i]; 373 vi = aj + ai[i]; 374 nz = diag[i] - ai[i]; 375 s[0] = b[i2]; 376 s[1] = b[i2 + 1]; 377 s[2] = b[i2 + 2]; 378 s[3] = b[i2 + 3]; 379 s[4] = b[i2 + 4]; 380 while (nz--) { 381 idx = 5 * (*vi++); 382 xw[0] = x[idx]; 383 xw[1] = x[1 + idx]; 384 xw[2] = x[2 + idx]; 385 xw[3] = x[3 + idx]; 386 xw[4] = x[4 + idx]; 387 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 388 v += 25; 389 } 390 t[i2] = s[0]; 391 t[i2 + 1] = s[1]; 392 t[i2 + 2] = s[2]; 393 t[i2 + 3] = s[3]; 394 t[i2 + 4] = s[4]; 395 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 396 x[i2] = xw[0]; 397 x[i2 + 1] = xw[1]; 398 x[i2 + 2] = xw[2]; 399 x[i2 + 3] = xw[3]; 400 x[i2 + 4] = xw[4]; 401 idiag += 25; 402 i2 += 5; 403 } 404 break; 405 case 6: 406 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 407 t[0] = b[0]; 408 t[1] = b[1]; 409 t[2] = b[2]; 410 t[3] = b[3]; 411 t[4] = b[4]; 412 t[5] = b[5]; 413 i2 = 6; 414 idiag += 36; 415 for (i = 1; i < m; i++) { 416 v = aa + 36 * ai[i]; 417 vi = aj + ai[i]; 418 nz = diag[i] - ai[i]; 419 s[0] = b[i2]; 420 s[1] = b[i2 + 1]; 421 s[2] = b[i2 + 2]; 422 s[3] = b[i2 + 3]; 423 s[4] = b[i2 + 4]; 424 s[5] = b[i2 + 5]; 425 while (nz--) { 426 idx = 6 * (*vi++); 427 xw[0] = x[idx]; 428 xw[1] = x[1 + idx]; 429 xw[2] = x[2 + idx]; 430 xw[3] = x[3 + idx]; 431 xw[4] = x[4 + idx]; 432 xw[5] = x[5 + idx]; 433 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 434 v += 36; 435 } 436 t[i2] = s[0]; 437 t[i2 + 1] = s[1]; 438 t[i2 + 2] = s[2]; 439 t[i2 + 3] = s[3]; 440 t[i2 + 4] = s[4]; 441 t[i2 + 5] = s[5]; 442 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 443 x[i2] = xw[0]; 444 x[i2 + 1] = xw[1]; 445 x[i2 + 2] = xw[2]; 446 x[i2 + 3] = xw[3]; 447 x[i2 + 4] = xw[4]; 448 x[i2 + 5] = xw[5]; 449 idiag += 36; 450 i2 += 6; 451 } 452 break; 453 case 7: 454 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 455 t[0] = b[0]; 456 t[1] = b[1]; 457 t[2] = b[2]; 458 t[3] = b[3]; 459 t[4] = b[4]; 460 t[5] = b[5]; 461 t[6] = b[6]; 462 i2 = 7; 463 idiag += 49; 464 for (i = 1; i < m; i++) { 465 v = aa + 49 * ai[i]; 466 vi = aj + ai[i]; 467 nz = diag[i] - ai[i]; 468 s[0] = b[i2]; 469 s[1] = b[i2 + 1]; 470 s[2] = b[i2 + 2]; 471 s[3] = b[i2 + 3]; 472 s[4] = b[i2 + 4]; 473 s[5] = b[i2 + 5]; 474 s[6] = b[i2 + 6]; 475 while (nz--) { 476 idx = 7 * (*vi++); 477 xw[0] = x[idx]; 478 xw[1] = x[1 + idx]; 479 xw[2] = x[2 + idx]; 480 xw[3] = x[3 + idx]; 481 xw[4] = x[4 + idx]; 482 xw[5] = x[5 + idx]; 483 xw[6] = x[6 + idx]; 484 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 485 v += 49; 486 } 487 t[i2] = s[0]; 488 t[i2 + 1] = s[1]; 489 t[i2 + 2] = s[2]; 490 t[i2 + 3] = s[3]; 491 t[i2 + 4] = s[4]; 492 t[i2 + 5] = s[5]; 493 t[i2 + 6] = s[6]; 494 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 495 x[i2] = xw[0]; 496 x[i2 + 1] = xw[1]; 497 x[i2 + 2] = xw[2]; 498 x[i2 + 3] = xw[3]; 499 x[i2 + 4] = xw[4]; 500 x[i2 + 5] = xw[5]; 501 x[i2 + 6] = xw[6]; 502 idiag += 49; 503 i2 += 7; 504 } 505 break; 506 default: 507 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 508 PetscCall(PetscArraycpy(t, b, bs)); 509 i2 = bs; 510 idiag += bs2; 511 for (i = 1; i < m; i++) { 512 v = aa + bs2 * ai[i]; 513 vi = aj + ai[i]; 514 nz = diag[i] - ai[i]; 515 516 PetscCall(PetscArraycpy(w, b + i2, bs)); 517 /* copy all rows of x that are needed into contiguous space */ 518 workt = work; 519 for (j = 0; j < nz; j++) { 520 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 521 workt += bs; 522 } 523 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 524 PetscCall(PetscArraycpy(t + i2, w, bs)); 525 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 526 527 idiag += bs2; 528 i2 += bs; 529 } 530 break; 531 } 532 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 533 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 534 xb = t; 535 } else xb = b; 536 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 537 idiag = a->idiag + bs2 * (a->mbs - 1); 538 i2 = bs * (m - 1); 539 switch (bs) { 540 case 1: 541 s[0] = xb[i2]; 542 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 543 x[i2] = xw[0]; 544 i2 -= 1; 545 for (i = m - 2; i >= 0; i--) { 546 v = aa + (diag[i] + 1); 547 vi = aj + diag[i] + 1; 548 nz = ai[i + 1] - diag[i] - 1; 549 s[0] = xb[i2]; 550 for (j = 0; j < nz; j++) { 551 xw[0] = x[vi[j]]; 552 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 553 } 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 idiag -= 1; 557 i2 -= 1; 558 } 559 break; 560 case 2: 561 s[0] = xb[i2]; 562 s[1] = xb[i2 + 1]; 563 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 564 x[i2] = xw[0]; 565 x[i2 + 1] = xw[1]; 566 i2 -= 2; 567 idiag -= 4; 568 for (i = m - 2; i >= 0; i--) { 569 v = aa + 4 * (diag[i] + 1); 570 vi = aj + diag[i] + 1; 571 nz = ai[i + 1] - diag[i] - 1; 572 s[0] = xb[i2]; 573 s[1] = xb[i2 + 1]; 574 for (j = 0; j < nz; j++) { 575 idx = 2 * vi[j]; 576 it = 4 * j; 577 xw[0] = x[idx]; 578 xw[1] = x[1 + idx]; 579 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 580 } 581 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 582 x[i2] = xw[0]; 583 x[i2 + 1] = xw[1]; 584 idiag -= 4; 585 i2 -= 2; 586 } 587 break; 588 case 3: 589 s[0] = xb[i2]; 590 s[1] = xb[i2 + 1]; 591 s[2] = xb[i2 + 2]; 592 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 593 x[i2] = xw[0]; 594 x[i2 + 1] = xw[1]; 595 x[i2 + 2] = xw[2]; 596 i2 -= 3; 597 idiag -= 9; 598 for (i = m - 2; i >= 0; i--) { 599 v = aa + 9 * (diag[i] + 1); 600 vi = aj + diag[i] + 1; 601 nz = ai[i + 1] - diag[i] - 1; 602 s[0] = xb[i2]; 603 s[1] = xb[i2 + 1]; 604 s[2] = xb[i2 + 2]; 605 while (nz--) { 606 idx = 3 * (*vi++); 607 xw[0] = x[idx]; 608 xw[1] = x[1 + idx]; 609 xw[2] = x[2 + idx]; 610 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 611 v += 9; 612 } 613 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 614 x[i2] = xw[0]; 615 x[i2 + 1] = xw[1]; 616 x[i2 + 2] = xw[2]; 617 idiag -= 9; 618 i2 -= 3; 619 } 620 break; 621 case 4: 622 s[0] = xb[i2]; 623 s[1] = xb[i2 + 1]; 624 s[2] = xb[i2 + 2]; 625 s[3] = xb[i2 + 3]; 626 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 627 x[i2] = xw[0]; 628 x[i2 + 1] = xw[1]; 629 x[i2 + 2] = xw[2]; 630 x[i2 + 3] = xw[3]; 631 i2 -= 4; 632 idiag -= 16; 633 for (i = m - 2; i >= 0; i--) { 634 v = aa + 16 * (diag[i] + 1); 635 vi = aj + diag[i] + 1; 636 nz = ai[i + 1] - diag[i] - 1; 637 s[0] = xb[i2]; 638 s[1] = xb[i2 + 1]; 639 s[2] = xb[i2 + 2]; 640 s[3] = xb[i2 + 3]; 641 while (nz--) { 642 idx = 4 * (*vi++); 643 xw[0] = x[idx]; 644 xw[1] = x[1 + idx]; 645 xw[2] = x[2 + idx]; 646 xw[3] = x[3 + idx]; 647 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 648 v += 16; 649 } 650 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 651 x[i2] = xw[0]; 652 x[i2 + 1] = xw[1]; 653 x[i2 + 2] = xw[2]; 654 x[i2 + 3] = xw[3]; 655 idiag -= 16; 656 i2 -= 4; 657 } 658 break; 659 case 5: 660 s[0] = xb[i2]; 661 s[1] = xb[i2 + 1]; 662 s[2] = xb[i2 + 2]; 663 s[3] = xb[i2 + 3]; 664 s[4] = xb[i2 + 4]; 665 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 666 x[i2] = xw[0]; 667 x[i2 + 1] = xw[1]; 668 x[i2 + 2] = xw[2]; 669 x[i2 + 3] = xw[3]; 670 x[i2 + 4] = xw[4]; 671 i2 -= 5; 672 idiag -= 25; 673 for (i = m - 2; i >= 0; i--) { 674 v = aa + 25 * (diag[i] + 1); 675 vi = aj + diag[i] + 1; 676 nz = ai[i + 1] - diag[i] - 1; 677 s[0] = xb[i2]; 678 s[1] = xb[i2 + 1]; 679 s[2] = xb[i2 + 2]; 680 s[3] = xb[i2 + 3]; 681 s[4] = xb[i2 + 4]; 682 while (nz--) { 683 idx = 5 * (*vi++); 684 xw[0] = x[idx]; 685 xw[1] = x[1 + idx]; 686 xw[2] = x[2 + idx]; 687 xw[3] = x[3 + idx]; 688 xw[4] = x[4 + idx]; 689 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 690 v += 25; 691 } 692 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 693 x[i2] = xw[0]; 694 x[i2 + 1] = xw[1]; 695 x[i2 + 2] = xw[2]; 696 x[i2 + 3] = xw[3]; 697 x[i2 + 4] = xw[4]; 698 idiag -= 25; 699 i2 -= 5; 700 } 701 break; 702 case 6: 703 s[0] = xb[i2]; 704 s[1] = xb[i2 + 1]; 705 s[2] = xb[i2 + 2]; 706 s[3] = xb[i2 + 3]; 707 s[4] = xb[i2 + 4]; 708 s[5] = xb[i2 + 5]; 709 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 710 x[i2] = xw[0]; 711 x[i2 + 1] = xw[1]; 712 x[i2 + 2] = xw[2]; 713 x[i2 + 3] = xw[3]; 714 x[i2 + 4] = xw[4]; 715 x[i2 + 5] = xw[5]; 716 i2 -= 6; 717 idiag -= 36; 718 for (i = m - 2; i >= 0; i--) { 719 v = aa + 36 * (diag[i] + 1); 720 vi = aj + diag[i] + 1; 721 nz = ai[i + 1] - diag[i] - 1; 722 s[0] = xb[i2]; 723 s[1] = xb[i2 + 1]; 724 s[2] = xb[i2 + 2]; 725 s[3] = xb[i2 + 3]; 726 s[4] = xb[i2 + 4]; 727 s[5] = xb[i2 + 5]; 728 while (nz--) { 729 idx = 6 * (*vi++); 730 xw[0] = x[idx]; 731 xw[1] = x[1 + idx]; 732 xw[2] = x[2 + idx]; 733 xw[3] = x[3 + idx]; 734 xw[4] = x[4 + idx]; 735 xw[5] = x[5 + idx]; 736 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 737 v += 36; 738 } 739 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 740 x[i2] = xw[0]; 741 x[i2 + 1] = xw[1]; 742 x[i2 + 2] = xw[2]; 743 x[i2 + 3] = xw[3]; 744 x[i2 + 4] = xw[4]; 745 x[i2 + 5] = xw[5]; 746 idiag -= 36; 747 i2 -= 6; 748 } 749 break; 750 case 7: 751 s[0] = xb[i2]; 752 s[1] = xb[i2 + 1]; 753 s[2] = xb[i2 + 2]; 754 s[3] = xb[i2 + 3]; 755 s[4] = xb[i2 + 4]; 756 s[5] = xb[i2 + 5]; 757 s[6] = xb[i2 + 6]; 758 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 759 x[i2] = xw[0]; 760 x[i2 + 1] = xw[1]; 761 x[i2 + 2] = xw[2]; 762 x[i2 + 3] = xw[3]; 763 x[i2 + 4] = xw[4]; 764 x[i2 + 5] = xw[5]; 765 x[i2 + 6] = xw[6]; 766 i2 -= 7; 767 idiag -= 49; 768 for (i = m - 2; i >= 0; i--) { 769 v = aa + 49 * (diag[i] + 1); 770 vi = aj + diag[i] + 1; 771 nz = ai[i + 1] - diag[i] - 1; 772 s[0] = xb[i2]; 773 s[1] = xb[i2 + 1]; 774 s[2] = xb[i2 + 2]; 775 s[3] = xb[i2 + 3]; 776 s[4] = xb[i2 + 4]; 777 s[5] = xb[i2 + 5]; 778 s[6] = xb[i2 + 6]; 779 while (nz--) { 780 idx = 7 * (*vi++); 781 xw[0] = x[idx]; 782 xw[1] = x[1 + idx]; 783 xw[2] = x[2 + idx]; 784 xw[3] = x[3 + idx]; 785 xw[4] = x[4 + idx]; 786 xw[5] = x[5 + idx]; 787 xw[6] = x[6 + idx]; 788 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 789 v += 49; 790 } 791 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 792 x[i2] = xw[0]; 793 x[i2 + 1] = xw[1]; 794 x[i2 + 2] = xw[2]; 795 x[i2 + 3] = xw[3]; 796 x[i2 + 4] = xw[4]; 797 x[i2 + 5] = xw[5]; 798 x[i2 + 6] = xw[6]; 799 idiag -= 49; 800 i2 -= 7; 801 } 802 break; 803 default: 804 PetscCall(PetscArraycpy(w, xb + i2, bs)); 805 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 806 i2 -= bs; 807 idiag -= bs2; 808 for (i = m - 2; i >= 0; i--) { 809 v = aa + bs2 * (diag[i] + 1); 810 vi = aj + diag[i] + 1; 811 nz = ai[i + 1] - diag[i] - 1; 812 813 PetscCall(PetscArraycpy(w, xb + i2, bs)); 814 /* copy all rows of x that are needed into contiguous space */ 815 workt = work; 816 for (j = 0; j < nz; j++) { 817 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 818 workt += bs; 819 } 820 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 821 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 822 823 idiag -= bs2; 824 i2 -= bs; 825 } 826 break; 827 } 828 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 829 } 830 its--; 831 } 832 while (its--) { 833 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 834 idiag = a->idiag; 835 i2 = 0; 836 switch (bs) { 837 case 1: 838 for (i = 0; i < m; i++) { 839 v = aa + ai[i]; 840 vi = aj + ai[i]; 841 nz = ai[i + 1] - ai[i]; 842 s[0] = b[i2]; 843 for (j = 0; j < nz; j++) { 844 xw[0] = x[vi[j]]; 845 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 846 } 847 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 848 x[i2] += xw[0]; 849 idiag += 1; 850 i2 += 1; 851 } 852 break; 853 case 2: 854 for (i = 0; i < m; i++) { 855 v = aa + 4 * ai[i]; 856 vi = aj + ai[i]; 857 nz = ai[i + 1] - ai[i]; 858 s[0] = b[i2]; 859 s[1] = b[i2 + 1]; 860 for (j = 0; j < nz; j++) { 861 idx = 2 * vi[j]; 862 it = 4 * j; 863 xw[0] = x[idx]; 864 xw[1] = x[1 + idx]; 865 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 866 } 867 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 868 x[i2] += xw[0]; 869 x[i2 + 1] += xw[1]; 870 idiag += 4; 871 i2 += 2; 872 } 873 break; 874 case 3: 875 for (i = 0; i < m; i++) { 876 v = aa + 9 * ai[i]; 877 vi = aj + ai[i]; 878 nz = ai[i + 1] - ai[i]; 879 s[0] = b[i2]; 880 s[1] = b[i2 + 1]; 881 s[2] = b[i2 + 2]; 882 while (nz--) { 883 idx = 3 * (*vi++); 884 xw[0] = x[idx]; 885 xw[1] = x[1 + idx]; 886 xw[2] = x[2 + idx]; 887 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 888 v += 9; 889 } 890 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 891 x[i2] += xw[0]; 892 x[i2 + 1] += xw[1]; 893 x[i2 + 2] += xw[2]; 894 idiag += 9; 895 i2 += 3; 896 } 897 break; 898 case 4: 899 for (i = 0; i < m; i++) { 900 v = aa + 16 * ai[i]; 901 vi = aj + ai[i]; 902 nz = ai[i + 1] - ai[i]; 903 s[0] = b[i2]; 904 s[1] = b[i2 + 1]; 905 s[2] = b[i2 + 2]; 906 s[3] = b[i2 + 3]; 907 while (nz--) { 908 idx = 4 * (*vi++); 909 xw[0] = x[idx]; 910 xw[1] = x[1 + idx]; 911 xw[2] = x[2 + idx]; 912 xw[3] = x[3 + idx]; 913 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 914 v += 16; 915 } 916 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 917 x[i2] += xw[0]; 918 x[i2 + 1] += xw[1]; 919 x[i2 + 2] += xw[2]; 920 x[i2 + 3] += xw[3]; 921 idiag += 16; 922 i2 += 4; 923 } 924 break; 925 case 5: 926 for (i = 0; i < m; i++) { 927 v = aa + 25 * ai[i]; 928 vi = aj + ai[i]; 929 nz = ai[i + 1] - ai[i]; 930 s[0] = b[i2]; 931 s[1] = b[i2 + 1]; 932 s[2] = b[i2 + 2]; 933 s[3] = b[i2 + 3]; 934 s[4] = b[i2 + 4]; 935 while (nz--) { 936 idx = 5 * (*vi++); 937 xw[0] = x[idx]; 938 xw[1] = x[1 + idx]; 939 xw[2] = x[2 + idx]; 940 xw[3] = x[3 + idx]; 941 xw[4] = x[4 + idx]; 942 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 943 v += 25; 944 } 945 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 946 x[i2] += xw[0]; 947 x[i2 + 1] += xw[1]; 948 x[i2 + 2] += xw[2]; 949 x[i2 + 3] += xw[3]; 950 x[i2 + 4] += xw[4]; 951 idiag += 25; 952 i2 += 5; 953 } 954 break; 955 case 6: 956 for (i = 0; i < m; i++) { 957 v = aa + 36 * ai[i]; 958 vi = aj + ai[i]; 959 nz = ai[i + 1] - ai[i]; 960 s[0] = b[i2]; 961 s[1] = b[i2 + 1]; 962 s[2] = b[i2 + 2]; 963 s[3] = b[i2 + 3]; 964 s[4] = b[i2 + 4]; 965 s[5] = b[i2 + 5]; 966 while (nz--) { 967 idx = 6 * (*vi++); 968 xw[0] = x[idx]; 969 xw[1] = x[1 + idx]; 970 xw[2] = x[2 + idx]; 971 xw[3] = x[3 + idx]; 972 xw[4] = x[4 + idx]; 973 xw[5] = x[5 + idx]; 974 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 975 v += 36; 976 } 977 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 978 x[i2] += xw[0]; 979 x[i2 + 1] += xw[1]; 980 x[i2 + 2] += xw[2]; 981 x[i2 + 3] += xw[3]; 982 x[i2 + 4] += xw[4]; 983 x[i2 + 5] += xw[5]; 984 idiag += 36; 985 i2 += 6; 986 } 987 break; 988 case 7: 989 for (i = 0; i < m; i++) { 990 v = aa + 49 * ai[i]; 991 vi = aj + ai[i]; 992 nz = ai[i + 1] - ai[i]; 993 s[0] = b[i2]; 994 s[1] = b[i2 + 1]; 995 s[2] = b[i2 + 2]; 996 s[3] = b[i2 + 3]; 997 s[4] = b[i2 + 4]; 998 s[5] = b[i2 + 5]; 999 s[6] = b[i2 + 6]; 1000 while (nz--) { 1001 idx = 7 * (*vi++); 1002 xw[0] = x[idx]; 1003 xw[1] = x[1 + idx]; 1004 xw[2] = x[2 + idx]; 1005 xw[3] = x[3 + idx]; 1006 xw[4] = x[4 + idx]; 1007 xw[5] = x[5 + idx]; 1008 xw[6] = x[6 + idx]; 1009 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1010 v += 49; 1011 } 1012 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1013 x[i2] += xw[0]; 1014 x[i2 + 1] += xw[1]; 1015 x[i2 + 2] += xw[2]; 1016 x[i2 + 3] += xw[3]; 1017 x[i2 + 4] += xw[4]; 1018 x[i2 + 5] += xw[5]; 1019 x[i2 + 6] += xw[6]; 1020 idiag += 49; 1021 i2 += 7; 1022 } 1023 break; 1024 default: 1025 for (i = 0; i < m; i++) { 1026 v = aa + bs2 * ai[i]; 1027 vi = aj + ai[i]; 1028 nz = ai[i + 1] - ai[i]; 1029 1030 PetscCall(PetscArraycpy(w, b + i2, bs)); 1031 /* copy all rows of x that are needed into contiguous space */ 1032 workt = work; 1033 for (j = 0; j < nz; j++) { 1034 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1035 workt += bs; 1036 } 1037 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1038 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1039 1040 idiag += bs2; 1041 i2 += bs; 1042 } 1043 break; 1044 } 1045 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1046 } 1047 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1048 idiag = a->idiag + bs2 * (a->mbs - 1); 1049 i2 = bs * (m - 1); 1050 switch (bs) { 1051 case 1: 1052 for (i = m - 1; i >= 0; i--) { 1053 v = aa + ai[i]; 1054 vi = aj + ai[i]; 1055 nz = ai[i + 1] - ai[i]; 1056 s[0] = b[i2]; 1057 for (j = 0; j < nz; j++) { 1058 xw[0] = x[vi[j]]; 1059 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1060 } 1061 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1062 x[i2] += xw[0]; 1063 idiag -= 1; 1064 i2 -= 1; 1065 } 1066 break; 1067 case 2: 1068 for (i = m - 1; i >= 0; i--) { 1069 v = aa + 4 * ai[i]; 1070 vi = aj + ai[i]; 1071 nz = ai[i + 1] - ai[i]; 1072 s[0] = b[i2]; 1073 s[1] = b[i2 + 1]; 1074 for (j = 0; j < nz; j++) { 1075 idx = 2 * vi[j]; 1076 it = 4 * j; 1077 xw[0] = x[idx]; 1078 xw[1] = x[1 + idx]; 1079 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1080 } 1081 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1082 x[i2] += xw[0]; 1083 x[i2 + 1] += xw[1]; 1084 idiag -= 4; 1085 i2 -= 2; 1086 } 1087 break; 1088 case 3: 1089 for (i = m - 1; i >= 0; i--) { 1090 v = aa + 9 * ai[i]; 1091 vi = aj + ai[i]; 1092 nz = ai[i + 1] - ai[i]; 1093 s[0] = b[i2]; 1094 s[1] = b[i2 + 1]; 1095 s[2] = b[i2 + 2]; 1096 while (nz--) { 1097 idx = 3 * (*vi++); 1098 xw[0] = x[idx]; 1099 xw[1] = x[1 + idx]; 1100 xw[2] = x[2 + idx]; 1101 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1102 v += 9; 1103 } 1104 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1105 x[i2] += xw[0]; 1106 x[i2 + 1] += xw[1]; 1107 x[i2 + 2] += xw[2]; 1108 idiag -= 9; 1109 i2 -= 3; 1110 } 1111 break; 1112 case 4: 1113 for (i = m - 1; i >= 0; i--) { 1114 v = aa + 16 * ai[i]; 1115 vi = aj + ai[i]; 1116 nz = ai[i + 1] - ai[i]; 1117 s[0] = b[i2]; 1118 s[1] = b[i2 + 1]; 1119 s[2] = b[i2 + 2]; 1120 s[3] = b[i2 + 3]; 1121 while (nz--) { 1122 idx = 4 * (*vi++); 1123 xw[0] = x[idx]; 1124 xw[1] = x[1 + idx]; 1125 xw[2] = x[2 + idx]; 1126 xw[3] = x[3 + idx]; 1127 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1128 v += 16; 1129 } 1130 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1131 x[i2] += xw[0]; 1132 x[i2 + 1] += xw[1]; 1133 x[i2 + 2] += xw[2]; 1134 x[i2 + 3] += xw[3]; 1135 idiag -= 16; 1136 i2 -= 4; 1137 } 1138 break; 1139 case 5: 1140 for (i = m - 1; i >= 0; i--) { 1141 v = aa + 25 * ai[i]; 1142 vi = aj + ai[i]; 1143 nz = ai[i + 1] - ai[i]; 1144 s[0] = b[i2]; 1145 s[1] = b[i2 + 1]; 1146 s[2] = b[i2 + 2]; 1147 s[3] = b[i2 + 3]; 1148 s[4] = b[i2 + 4]; 1149 while (nz--) { 1150 idx = 5 * (*vi++); 1151 xw[0] = x[idx]; 1152 xw[1] = x[1 + idx]; 1153 xw[2] = x[2 + idx]; 1154 xw[3] = x[3 + idx]; 1155 xw[4] = x[4 + idx]; 1156 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1157 v += 25; 1158 } 1159 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1160 x[i2] += xw[0]; 1161 x[i2 + 1] += xw[1]; 1162 x[i2 + 2] += xw[2]; 1163 x[i2 + 3] += xw[3]; 1164 x[i2 + 4] += xw[4]; 1165 idiag -= 25; 1166 i2 -= 5; 1167 } 1168 break; 1169 case 6: 1170 for (i = m - 1; i >= 0; i--) { 1171 v = aa + 36 * ai[i]; 1172 vi = aj + ai[i]; 1173 nz = ai[i + 1] - ai[i]; 1174 s[0] = b[i2]; 1175 s[1] = b[i2 + 1]; 1176 s[2] = b[i2 + 2]; 1177 s[3] = b[i2 + 3]; 1178 s[4] = b[i2 + 4]; 1179 s[5] = b[i2 + 5]; 1180 while (nz--) { 1181 idx = 6 * (*vi++); 1182 xw[0] = x[idx]; 1183 xw[1] = x[1 + idx]; 1184 xw[2] = x[2 + idx]; 1185 xw[3] = x[3 + idx]; 1186 xw[4] = x[4 + idx]; 1187 xw[5] = x[5 + idx]; 1188 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1189 v += 36; 1190 } 1191 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1192 x[i2] += xw[0]; 1193 x[i2 + 1] += xw[1]; 1194 x[i2 + 2] += xw[2]; 1195 x[i2 + 3] += xw[3]; 1196 x[i2 + 4] += xw[4]; 1197 x[i2 + 5] += xw[5]; 1198 idiag -= 36; 1199 i2 -= 6; 1200 } 1201 break; 1202 case 7: 1203 for (i = m - 1; i >= 0; i--) { 1204 v = aa + 49 * ai[i]; 1205 vi = aj + ai[i]; 1206 nz = ai[i + 1] - ai[i]; 1207 s[0] = b[i2]; 1208 s[1] = b[i2 + 1]; 1209 s[2] = b[i2 + 2]; 1210 s[3] = b[i2 + 3]; 1211 s[4] = b[i2 + 4]; 1212 s[5] = b[i2 + 5]; 1213 s[6] = b[i2 + 6]; 1214 while (nz--) { 1215 idx = 7 * (*vi++); 1216 xw[0] = x[idx]; 1217 xw[1] = x[1 + idx]; 1218 xw[2] = x[2 + idx]; 1219 xw[3] = x[3 + idx]; 1220 xw[4] = x[4 + idx]; 1221 xw[5] = x[5 + idx]; 1222 xw[6] = x[6 + idx]; 1223 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1224 v += 49; 1225 } 1226 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1227 x[i2] += xw[0]; 1228 x[i2 + 1] += xw[1]; 1229 x[i2 + 2] += xw[2]; 1230 x[i2 + 3] += xw[3]; 1231 x[i2 + 4] += xw[4]; 1232 x[i2 + 5] += xw[5]; 1233 x[i2 + 6] += xw[6]; 1234 idiag -= 49; 1235 i2 -= 7; 1236 } 1237 break; 1238 default: 1239 for (i = m - 1; i >= 0; i--) { 1240 v = aa + bs2 * ai[i]; 1241 vi = aj + ai[i]; 1242 nz = ai[i + 1] - ai[i]; 1243 1244 PetscCall(PetscArraycpy(w, b + i2, bs)); 1245 /* copy all rows of x that are needed into contiguous space */ 1246 workt = work; 1247 for (j = 0; j < nz; j++) { 1248 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1249 workt += bs; 1250 } 1251 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1252 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1253 1254 idiag -= bs2; 1255 i2 -= bs; 1256 } 1257 break; 1258 } 1259 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1260 } 1261 } 1262 PetscCall(VecRestoreArray(xx, &x)); 1263 PetscCall(VecRestoreArrayRead(bb, &b)); 1264 PetscFunctionReturn(0); 1265 } 1266 1267 /* 1268 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1269 */ 1270 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1271 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1272 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1273 #define matsetvaluesblocked4_ matsetvaluesblocked4 1274 #endif 1275 1276 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1277 { 1278 Mat A = *AA; 1279 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1280 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1281 PetscInt *ai = a->i, *ailen = a->ilen; 1282 PetscInt *aj = a->j, stepval, lastcol = -1; 1283 const PetscScalar *value = v; 1284 MatScalar *ap, *aa = a->a, *bap; 1285 1286 PetscFunctionBegin; 1287 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1288 stepval = (n - 1) * 4; 1289 for (k = 0; k < m; k++) { /* loop over added rows */ 1290 row = im[k]; 1291 rp = aj + ai[row]; 1292 ap = aa + 16 * ai[row]; 1293 nrow = ailen[row]; 1294 low = 0; 1295 high = nrow; 1296 for (l = 0; l < n; l++) { /* loop over added columns */ 1297 col = in[l]; 1298 if (col <= lastcol) low = 0; 1299 else high = nrow; 1300 lastcol = col; 1301 value = v + k * (stepval + 4 + l) * 4; 1302 while (high - low > 7) { 1303 t = (low + high) / 2; 1304 if (rp[t] > col) high = t; 1305 else low = t; 1306 } 1307 for (i = low; i < high; i++) { 1308 if (rp[i] > col) break; 1309 if (rp[i] == col) { 1310 bap = ap + 16 * i; 1311 for (ii = 0; ii < 4; ii++, value += stepval) { 1312 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1313 } 1314 goto noinsert2; 1315 } 1316 } 1317 N = nrow++ - 1; 1318 high++; /* added new column index thus must search to one higher than before */ 1319 /* shift up all the later entries in this row */ 1320 for (ii = N; ii >= i; ii--) { 1321 rp[ii + 1] = rp[ii]; 1322 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1323 } 1324 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1325 rp[i] = col; 1326 bap = ap + 16 * i; 1327 for (ii = 0; ii < 4; ii++, value += stepval) { 1328 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1329 } 1330 noinsert2:; 1331 low = i; 1332 } 1333 ailen[row] = nrow; 1334 } 1335 PetscFunctionReturnVoid(); 1336 } 1337 1338 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1339 #define matsetvalues4_ MATSETVALUES4 1340 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1341 #define matsetvalues4_ matsetvalues4 1342 #endif 1343 1344 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1345 { 1346 Mat A = *AA; 1347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1348 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1349 PetscInt *ai = a->i, *ailen = a->ilen; 1350 PetscInt *aj = a->j, brow, bcol; 1351 PetscInt ridx, cidx, lastcol = -1; 1352 MatScalar *ap, value, *aa = a->a, *bap; 1353 1354 PetscFunctionBegin; 1355 for (k = 0; k < m; k++) { /* loop over added rows */ 1356 row = im[k]; 1357 brow = row / 4; 1358 rp = aj + ai[brow]; 1359 ap = aa + 16 * ai[brow]; 1360 nrow = ailen[brow]; 1361 low = 0; 1362 high = nrow; 1363 for (l = 0; l < n; l++) { /* loop over added columns */ 1364 col = in[l]; 1365 bcol = col / 4; 1366 ridx = row % 4; 1367 cidx = col % 4; 1368 value = v[l + k * n]; 1369 if (col <= lastcol) low = 0; 1370 else high = nrow; 1371 lastcol = col; 1372 while (high - low > 7) { 1373 t = (low + high) / 2; 1374 if (rp[t] > bcol) high = t; 1375 else low = t; 1376 } 1377 for (i = low; i < high; i++) { 1378 if (rp[i] > bcol) break; 1379 if (rp[i] == bcol) { 1380 bap = ap + 16 * i + 4 * cidx + ridx; 1381 *bap += value; 1382 goto noinsert1; 1383 } 1384 } 1385 N = nrow++ - 1; 1386 high++; /* added new column thus must search to one higher than before */ 1387 /* shift up all the later entries in this row */ 1388 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1389 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1390 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1391 rp[i] = bcol; 1392 ap[16 * i + 4 * cidx + ridx] = value; 1393 noinsert1:; 1394 low = i; 1395 } 1396 ailen[brow] = nrow; 1397 } 1398 PetscFunctionReturnVoid(); 1399 } 1400 1401 /* 1402 Checks for missing diagonals 1403 */ 1404 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1405 { 1406 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1407 PetscInt *diag, *ii = a->i, i; 1408 1409 PetscFunctionBegin; 1410 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1411 *missing = PETSC_FALSE; 1412 if (A->rmap->n > 0 && !ii) { 1413 *missing = PETSC_TRUE; 1414 if (d) *d = 0; 1415 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1416 } else { 1417 PetscInt n; 1418 n = PetscMin(a->mbs, a->nbs); 1419 diag = a->diag; 1420 for (i = 0; i < n; i++) { 1421 if (diag[i] >= ii[i + 1]) { 1422 *missing = PETSC_TRUE; 1423 if (d) *d = i; 1424 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1425 break; 1426 } 1427 } 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1433 { 1434 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1435 PetscInt i, j, m = a->mbs; 1436 1437 PetscFunctionBegin; 1438 if (!a->diag) { 1439 PetscCall(PetscMalloc1(m, &a->diag)); 1440 a->free_diag = PETSC_TRUE; 1441 } 1442 for (i = 0; i < m; i++) { 1443 a->diag[i] = a->i[i + 1]; 1444 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1445 if (a->j[j] == i) { 1446 a->diag[i] = j; 1447 break; 1448 } 1449 } 1450 } 1451 PetscFunctionReturn(0); 1452 } 1453 1454 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1455 { 1456 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1457 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1458 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1459 1460 PetscFunctionBegin; 1461 *nn = n; 1462 if (!ia) PetscFunctionReturn(0); 1463 if (symmetric) { 1464 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1465 nz = tia[n]; 1466 } else { 1467 tia = a->i; 1468 tja = a->j; 1469 } 1470 1471 if (!blockcompressed && bs > 1) { 1472 (*nn) *= bs; 1473 /* malloc & create the natural set of indices */ 1474 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1475 if (n) { 1476 (*ia)[0] = oshift; 1477 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1478 } 1479 1480 for (i = 1; i < n; i++) { 1481 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1482 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1483 } 1484 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1485 1486 if (inja) { 1487 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1488 cnt = 0; 1489 for (i = 0; i < n; i++) { 1490 for (j = 0; j < bs; j++) { 1491 for (k = tia[i]; k < tia[i + 1]; k++) { 1492 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1493 } 1494 } 1495 } 1496 } 1497 1498 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1499 PetscCall(PetscFree(tia)); 1500 PetscCall(PetscFree(tja)); 1501 } 1502 } else if (oshift == 1) { 1503 if (symmetric) { 1504 nz = tia[A->rmap->n / bs]; 1505 /* add 1 to i and j indices */ 1506 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1507 *ia = tia; 1508 if (ja) { 1509 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1510 *ja = tja; 1511 } 1512 } else { 1513 nz = a->i[A->rmap->n / bs]; 1514 /* malloc space and add 1 to i and j indices */ 1515 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1516 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1517 if (ja) { 1518 PetscCall(PetscMalloc1(nz, ja)); 1519 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1520 } 1521 } 1522 } else { 1523 *ia = tia; 1524 if (ja) *ja = tja; 1525 } 1526 PetscFunctionReturn(0); 1527 } 1528 1529 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1530 { 1531 PetscFunctionBegin; 1532 if (!ia) PetscFunctionReturn(0); 1533 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1534 PetscCall(PetscFree(*ia)); 1535 if (ja) PetscCall(PetscFree(*ja)); 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1541 { 1542 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1543 1544 PetscFunctionBegin; 1545 #if defined(PETSC_USE_LOG) 1546 PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz); 1547 #endif 1548 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1549 PetscCall(ISDestroy(&a->row)); 1550 PetscCall(ISDestroy(&a->col)); 1551 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1552 PetscCall(PetscFree(a->idiag)); 1553 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1554 PetscCall(PetscFree(a->solve_work)); 1555 PetscCall(PetscFree(a->mult_work)); 1556 PetscCall(PetscFree(a->sor_workt)); 1557 PetscCall(PetscFree(a->sor_work)); 1558 PetscCall(ISDestroy(&a->icol)); 1559 PetscCall(PetscFree(a->saved_values)); 1560 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1561 1562 PetscCall(MatDestroy(&a->sbaijMat)); 1563 PetscCall(MatDestroy(&a->parent)); 1564 PetscCall(PetscFree(A->data)); 1565 1566 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1567 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1568 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1569 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1570 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1571 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1572 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1573 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1574 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1575 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1576 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1577 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1578 #if defined(PETSC_HAVE_HYPRE) 1579 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1580 #endif 1581 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1582 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1583 PetscFunctionReturn(0); 1584 } 1585 1586 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1587 { 1588 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1589 1590 PetscFunctionBegin; 1591 switch (op) { 1592 case MAT_ROW_ORIENTED: 1593 a->roworiented = flg; 1594 break; 1595 case MAT_KEEP_NONZERO_PATTERN: 1596 a->keepnonzeropattern = flg; 1597 break; 1598 case MAT_NEW_NONZERO_LOCATIONS: 1599 a->nonew = (flg ? 0 : 1); 1600 break; 1601 case MAT_NEW_NONZERO_LOCATION_ERR: 1602 a->nonew = (flg ? -1 : 0); 1603 break; 1604 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1605 a->nonew = (flg ? -2 : 0); 1606 break; 1607 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1608 a->nounused = (flg ? -1 : 0); 1609 break; 1610 case MAT_FORCE_DIAGONAL_ENTRIES: 1611 case MAT_IGNORE_OFF_PROC_ENTRIES: 1612 case MAT_USE_HASH_TABLE: 1613 case MAT_SORTED_FULL: 1614 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1615 break; 1616 case MAT_SPD: 1617 case MAT_SYMMETRIC: 1618 case MAT_STRUCTURALLY_SYMMETRIC: 1619 case MAT_HERMITIAN: 1620 case MAT_SYMMETRY_ETERNAL: 1621 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1622 case MAT_SUBMAT_SINGLEIS: 1623 case MAT_STRUCTURE_ONLY: 1624 case MAT_SPD_ETERNAL: 1625 /* if the diagonal matrix is square it inherits some of the properties above */ 1626 break; 1627 default: 1628 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1629 } 1630 PetscFunctionReturn(0); 1631 } 1632 1633 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1634 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1635 { 1636 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1637 MatScalar *aa_i; 1638 PetscScalar *v_i; 1639 1640 PetscFunctionBegin; 1641 bs = A->rmap->bs; 1642 bs2 = bs * bs; 1643 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1644 1645 bn = row / bs; /* Block number */ 1646 bp = row % bs; /* Block Position */ 1647 M = ai[bn + 1] - ai[bn]; 1648 *nz = bs * M; 1649 1650 if (v) { 1651 *v = NULL; 1652 if (*nz) { 1653 PetscCall(PetscMalloc1(*nz, v)); 1654 for (i = 0; i < M; i++) { /* for each block in the block row */ 1655 v_i = *v + i * bs; 1656 aa_i = aa + bs2 * (ai[bn] + i); 1657 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1658 } 1659 } 1660 } 1661 1662 if (idx) { 1663 *idx = NULL; 1664 if (*nz) { 1665 PetscCall(PetscMalloc1(*nz, idx)); 1666 for (i = 0; i < M; i++) { /* for each block in the block row */ 1667 idx_i = *idx + i * bs; 1668 itmp = bs * aj[ai[bn] + i]; 1669 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1670 } 1671 } 1672 } 1673 PetscFunctionReturn(0); 1674 } 1675 1676 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1677 { 1678 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1679 1680 PetscFunctionBegin; 1681 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1686 { 1687 PetscFunctionBegin; 1688 if (nz) *nz = 0; 1689 if (idx) PetscCall(PetscFree(*idx)); 1690 if (v) PetscCall(PetscFree(*v)); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1695 { 1696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1697 Mat C; 1698 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1699 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1700 MatScalar *ata, *aa = a->a; 1701 1702 PetscFunctionBegin; 1703 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1704 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1705 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1706 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1707 1708 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1709 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1710 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1711 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1712 1713 at = (Mat_SeqBAIJ *)C->data; 1714 ati = at->i; 1715 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1716 } else { 1717 C = *B; 1718 at = (Mat_SeqBAIJ *)C->data; 1719 ati = at->i; 1720 } 1721 1722 atj = at->j; 1723 ata = at->a; 1724 1725 /* Copy ati into atfill so we have locations of the next free space in atj */ 1726 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1727 1728 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1729 for (i = 0; i < mbs; i++) { 1730 anzj = ai[i + 1] - ai[i]; 1731 for (j = 0; j < anzj; j++) { 1732 atj[atfill[*aj]] = i; 1733 for (kr = 0; kr < bs; kr++) { 1734 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1735 } 1736 atfill[*aj++] += 1; 1737 } 1738 } 1739 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1740 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1741 1742 /* Clean up temporary space and complete requests. */ 1743 PetscCall(PetscFree(atfill)); 1744 1745 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1746 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1747 *B = C; 1748 } else { 1749 PetscCall(MatHeaderMerge(A, &C)); 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1755 { 1756 Mat Btrans; 1757 1758 PetscFunctionBegin; 1759 *f = PETSC_FALSE; 1760 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1761 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1762 PetscCall(MatDestroy(&Btrans)); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1767 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1768 { 1769 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1770 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1771 PetscInt *rowlens, *colidxs; 1772 PetscScalar *matvals; 1773 1774 PetscFunctionBegin; 1775 PetscCall(PetscViewerSetUp(viewer)); 1776 1777 M = mat->rmap->N; 1778 N = mat->cmap->N; 1779 m = mat->rmap->n; 1780 bs = mat->rmap->bs; 1781 nz = bs * bs * A->nz; 1782 1783 /* write matrix header */ 1784 header[0] = MAT_FILE_CLASSID; 1785 header[1] = M; 1786 header[2] = N; 1787 header[3] = nz; 1788 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1789 1790 /* store row lengths */ 1791 PetscCall(PetscMalloc1(m, &rowlens)); 1792 for (cnt = 0, i = 0; i < A->mbs; i++) 1793 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1794 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1795 PetscCall(PetscFree(rowlens)); 1796 1797 /* store column indices */ 1798 PetscCall(PetscMalloc1(nz, &colidxs)); 1799 for (cnt = 0, i = 0; i < A->mbs; i++) 1800 for (k = 0; k < bs; k++) 1801 for (j = A->i[i]; j < A->i[i + 1]; j++) 1802 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1803 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1804 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1805 PetscCall(PetscFree(colidxs)); 1806 1807 /* store nonzero values */ 1808 PetscCall(PetscMalloc1(nz, &matvals)); 1809 for (cnt = 0, i = 0; i < A->mbs; i++) 1810 for (k = 0; k < bs; k++) 1811 for (j = A->i[i]; j < A->i[i + 1]; j++) 1812 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1813 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1814 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1815 PetscCall(PetscFree(matvals)); 1816 1817 /* write block size option to the viewer's .info file */ 1818 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1819 PetscFunctionReturn(0); 1820 } 1821 1822 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1823 { 1824 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1825 PetscInt i, bs = A->rmap->bs, k; 1826 1827 PetscFunctionBegin; 1828 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1829 for (i = 0; i < a->mbs; i++) { 1830 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1831 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1832 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1833 } 1834 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1835 PetscFunctionReturn(0); 1836 } 1837 1838 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1839 { 1840 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1841 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1842 PetscViewerFormat format; 1843 1844 PetscFunctionBegin; 1845 if (A->structure_only) { 1846 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1847 PetscFunctionReturn(0); 1848 } 1849 1850 PetscCall(PetscViewerGetFormat(viewer, &format)); 1851 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1852 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1853 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1854 const char *matname; 1855 Mat aij; 1856 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1857 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1858 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1859 PetscCall(MatView(aij, viewer)); 1860 PetscCall(MatDestroy(&aij)); 1861 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1862 PetscFunctionReturn(0); 1863 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1864 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1865 for (i = 0; i < a->mbs; i++) { 1866 for (j = 0; j < bs; j++) { 1867 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1868 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1869 for (l = 0; l < bs; l++) { 1870 #if defined(PETSC_USE_COMPLEX) 1871 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1872 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1873 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1874 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1875 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1876 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1877 } 1878 #else 1879 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1880 #endif 1881 } 1882 } 1883 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1884 } 1885 } 1886 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1887 } else { 1888 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1889 for (i = 0; i < a->mbs; i++) { 1890 for (j = 0; j < bs; j++) { 1891 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1892 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1893 for (l = 0; l < bs; l++) { 1894 #if defined(PETSC_USE_COMPLEX) 1895 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1896 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1897 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1898 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1899 } else { 1900 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1901 } 1902 #else 1903 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1904 #endif 1905 } 1906 } 1907 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1908 } 1909 } 1910 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1911 } 1912 PetscCall(PetscViewerFlush(viewer)); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 #include <petscdraw.h> 1917 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1918 { 1919 Mat A = (Mat)Aa; 1920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1921 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1922 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1923 MatScalar *aa; 1924 PetscViewer viewer; 1925 PetscViewerFormat format; 1926 1927 PetscFunctionBegin; 1928 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1929 PetscCall(PetscViewerGetFormat(viewer, &format)); 1930 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1931 1932 /* loop over matrix elements drawing boxes */ 1933 1934 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1935 PetscDrawCollectiveBegin(draw); 1936 /* Blue for negative, Cyan for zero and Red for positive */ 1937 color = PETSC_DRAW_BLUE; 1938 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1939 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1940 y_l = A->rmap->N - row - 1.0; 1941 y_r = y_l + 1.0; 1942 x_l = a->j[j] * bs; 1943 x_r = x_l + 1.0; 1944 aa = a->a + j * bs2; 1945 for (k = 0; k < bs; k++) { 1946 for (l = 0; l < bs; l++) { 1947 if (PetscRealPart(*aa++) >= 0.) continue; 1948 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1949 } 1950 } 1951 } 1952 } 1953 color = PETSC_DRAW_CYAN; 1954 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1955 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1956 y_l = A->rmap->N - row - 1.0; 1957 y_r = y_l + 1.0; 1958 x_l = a->j[j] * bs; 1959 x_r = x_l + 1.0; 1960 aa = a->a + j * bs2; 1961 for (k = 0; k < bs; k++) { 1962 for (l = 0; l < bs; l++) { 1963 if (PetscRealPart(*aa++) != 0.) continue; 1964 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1965 } 1966 } 1967 } 1968 } 1969 color = PETSC_DRAW_RED; 1970 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1971 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1972 y_l = A->rmap->N - row - 1.0; 1973 y_r = y_l + 1.0; 1974 x_l = a->j[j] * bs; 1975 x_r = x_l + 1.0; 1976 aa = a->a + j * bs2; 1977 for (k = 0; k < bs; k++) { 1978 for (l = 0; l < bs; l++) { 1979 if (PetscRealPart(*aa++) <= 0.) continue; 1980 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1981 } 1982 } 1983 } 1984 } 1985 PetscDrawCollectiveEnd(draw); 1986 } else { 1987 /* use contour shading to indicate magnitude of values */ 1988 /* first determine max of all nonzero values */ 1989 PetscReal minv = 0.0, maxv = 0.0; 1990 PetscDraw popup; 1991 1992 for (i = 0; i < a->nz * a->bs2; i++) { 1993 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1994 } 1995 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1996 PetscCall(PetscDrawGetPopup(draw, &popup)); 1997 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 1998 1999 PetscDrawCollectiveBegin(draw); 2000 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2001 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2002 y_l = A->rmap->N - row - 1.0; 2003 y_r = y_l + 1.0; 2004 x_l = a->j[j] * bs; 2005 x_r = x_l + 1.0; 2006 aa = a->a + j * bs2; 2007 for (k = 0; k < bs; k++) { 2008 for (l = 0; l < bs; l++) { 2009 MatScalar v = *aa++; 2010 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2011 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2012 } 2013 } 2014 } 2015 } 2016 PetscDrawCollectiveEnd(draw); 2017 } 2018 PetscFunctionReturn(0); 2019 } 2020 2021 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2022 { 2023 PetscReal xl, yl, xr, yr, w, h; 2024 PetscDraw draw; 2025 PetscBool isnull; 2026 2027 PetscFunctionBegin; 2028 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2029 PetscCall(PetscDrawIsNull(draw, &isnull)); 2030 if (isnull) PetscFunctionReturn(0); 2031 2032 xr = A->cmap->n; 2033 yr = A->rmap->N; 2034 h = yr / 10.0; 2035 w = xr / 10.0; 2036 xr += w; 2037 yr += h; 2038 xl = -w; 2039 yl = -h; 2040 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2041 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2042 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2043 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2044 PetscCall(PetscDrawSave(draw)); 2045 PetscFunctionReturn(0); 2046 } 2047 2048 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2049 { 2050 PetscBool iascii, isbinary, isdraw; 2051 2052 PetscFunctionBegin; 2053 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2054 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2055 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2056 if (iascii) { 2057 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2058 } else if (isbinary) { 2059 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2060 } else if (isdraw) { 2061 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2062 } else { 2063 Mat B; 2064 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2065 PetscCall(MatView(B, viewer)); 2066 PetscCall(MatDestroy(&B)); 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2072 { 2073 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2074 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2075 PetscInt *ai = a->i, *ailen = a->ilen; 2076 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2077 MatScalar *ap, *aa = a->a; 2078 2079 PetscFunctionBegin; 2080 for (k = 0; k < m; k++) { /* loop over rows */ 2081 row = im[k]; 2082 brow = row / bs; 2083 if (row < 0) { 2084 v += n; 2085 continue; 2086 } /* negative row */ 2087 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2088 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2089 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2090 nrow = ailen[brow]; 2091 for (l = 0; l < n; l++) { /* loop over columns */ 2092 if (in[l] < 0) { 2093 v++; 2094 continue; 2095 } /* negative column */ 2096 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2097 col = in[l]; 2098 bcol = col / bs; 2099 cidx = col % bs; 2100 ridx = row % bs; 2101 high = nrow; 2102 low = 0; /* assume unsorted */ 2103 while (high - low > 5) { 2104 t = (low + high) / 2; 2105 if (rp[t] > bcol) high = t; 2106 else low = t; 2107 } 2108 for (i = low; i < high; i++) { 2109 if (rp[i] > bcol) break; 2110 if (rp[i] == bcol) { 2111 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2112 goto finished; 2113 } 2114 } 2115 *v++ = 0.0; 2116 finished:; 2117 } 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2123 { 2124 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2125 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2126 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2127 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2128 PetscBool roworiented = a->roworiented; 2129 const PetscScalar *value = v; 2130 MatScalar *ap = NULL, *aa = a->a, *bap; 2131 2132 PetscFunctionBegin; 2133 if (roworiented) { 2134 stepval = (n - 1) * bs; 2135 } else { 2136 stepval = (m - 1) * bs; 2137 } 2138 for (k = 0; k < m; k++) { /* loop over added rows */ 2139 row = im[k]; 2140 if (row < 0) continue; 2141 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2142 rp = aj + ai[row]; 2143 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2144 rmax = imax[row]; 2145 nrow = ailen[row]; 2146 low = 0; 2147 high = nrow; 2148 for (l = 0; l < n; l++) { /* loop over added columns */ 2149 if (in[l] < 0) continue; 2150 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2151 col = in[l]; 2152 if (!A->structure_only) { 2153 if (roworiented) { 2154 value = v + (k * (stepval + bs) + l) * bs; 2155 } else { 2156 value = v + (l * (stepval + bs) + k) * bs; 2157 } 2158 } 2159 if (col <= lastcol) low = 0; 2160 else high = nrow; 2161 lastcol = col; 2162 while (high - low > 7) { 2163 t = (low + high) / 2; 2164 if (rp[t] > col) high = t; 2165 else low = t; 2166 } 2167 for (i = low; i < high; i++) { 2168 if (rp[i] > col) break; 2169 if (rp[i] == col) { 2170 if (A->structure_only) goto noinsert2; 2171 bap = ap + bs2 * i; 2172 if (roworiented) { 2173 if (is == ADD_VALUES) { 2174 for (ii = 0; ii < bs; ii++, value += stepval) { 2175 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2176 } 2177 } else { 2178 for (ii = 0; ii < bs; ii++, value += stepval) { 2179 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2180 } 2181 } 2182 } else { 2183 if (is == ADD_VALUES) { 2184 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2185 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2186 bap += bs; 2187 } 2188 } else { 2189 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2190 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2191 bap += bs; 2192 } 2193 } 2194 } 2195 goto noinsert2; 2196 } 2197 } 2198 if (nonew == 1) goto noinsert2; 2199 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2200 if (A->structure_only) { 2201 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2202 } else { 2203 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2204 } 2205 N = nrow++ - 1; 2206 high++; 2207 /* shift up all the later entries in this row */ 2208 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2209 rp[i] = col; 2210 if (!A->structure_only) { 2211 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2212 bap = ap + bs2 * i; 2213 if (roworiented) { 2214 for (ii = 0; ii < bs; ii++, value += stepval) { 2215 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2216 } 2217 } else { 2218 for (ii = 0; ii < bs; ii++, value += stepval) { 2219 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2220 } 2221 } 2222 } 2223 noinsert2:; 2224 low = i; 2225 } 2226 ailen[row] = nrow; 2227 } 2228 PetscFunctionReturn(0); 2229 } 2230 2231 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2232 { 2233 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2234 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2235 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2236 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2237 MatScalar *aa = a->a, *ap; 2238 PetscReal ratio = 0.6; 2239 2240 PetscFunctionBegin; 2241 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 2242 2243 if (m) rmax = ailen[0]; 2244 for (i = 1; i < mbs; i++) { 2245 /* move each row back by the amount of empty slots (fshift) before it*/ 2246 fshift += imax[i - 1] - ailen[i - 1]; 2247 rmax = PetscMax(rmax, ailen[i]); 2248 if (fshift) { 2249 ip = aj + ai[i]; 2250 ap = aa + bs2 * ai[i]; 2251 N = ailen[i]; 2252 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2253 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2254 } 2255 ai[i] = ai[i - 1] + ailen[i - 1]; 2256 } 2257 if (mbs) { 2258 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2259 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2260 } 2261 2262 /* reset ilen and imax for each row */ 2263 a->nonzerorowcnt = 0; 2264 if (A->structure_only) { 2265 PetscCall(PetscFree2(a->imax, a->ilen)); 2266 } else { /* !A->structure_only */ 2267 for (i = 0; i < mbs; i++) { 2268 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2269 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2270 } 2271 } 2272 a->nz = ai[mbs]; 2273 2274 /* diagonals may have moved, so kill the diagonal pointers */ 2275 a->idiagvalid = PETSC_FALSE; 2276 if (fshift && a->diag) { 2277 PetscCall(PetscFree(a->diag)); 2278 a->diag = NULL; 2279 } 2280 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2281 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2282 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2283 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2284 2285 A->info.mallocs += a->reallocs; 2286 a->reallocs = 0; 2287 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2288 a->rmax = rmax; 2289 2290 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 /* 2295 This function returns an array of flags which indicate the locations of contiguous 2296 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2297 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2298 Assume: sizes should be long enough to hold all the values. 2299 */ 2300 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2301 { 2302 PetscInt i, j, k, row; 2303 PetscBool flg; 2304 2305 PetscFunctionBegin; 2306 for (i = 0, j = 0; i < n; j++) { 2307 row = idx[i]; 2308 if (row % bs != 0) { /* Not the beginning of a block */ 2309 sizes[j] = 1; 2310 i++; 2311 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2312 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2313 i++; 2314 } else { /* Beginning of the block, so check if the complete block exists */ 2315 flg = PETSC_TRUE; 2316 for (k = 1; k < bs; k++) { 2317 if (row + k != idx[i + k]) { /* break in the block */ 2318 flg = PETSC_FALSE; 2319 break; 2320 } 2321 } 2322 if (flg) { /* No break in the bs */ 2323 sizes[j] = bs; 2324 i += bs; 2325 } else { 2326 sizes[j] = 1; 2327 i++; 2328 } 2329 } 2330 } 2331 *bs_max = j; 2332 PetscFunctionReturn(0); 2333 } 2334 2335 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2336 { 2337 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2338 PetscInt i, j, k, count, *rows; 2339 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2340 PetscScalar zero = 0.0; 2341 MatScalar *aa; 2342 const PetscScalar *xx; 2343 PetscScalar *bb; 2344 2345 PetscFunctionBegin; 2346 /* fix right hand side if needed */ 2347 if (x && b) { 2348 PetscCall(VecGetArrayRead(x, &xx)); 2349 PetscCall(VecGetArray(b, &bb)); 2350 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2351 PetscCall(VecRestoreArrayRead(x, &xx)); 2352 PetscCall(VecRestoreArray(b, &bb)); 2353 } 2354 2355 /* Make a copy of the IS and sort it */ 2356 /* allocate memory for rows,sizes */ 2357 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2358 2359 /* copy IS values to rows, and sort them */ 2360 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2361 PetscCall(PetscSortInt(is_n, rows)); 2362 2363 if (baij->keepnonzeropattern) { 2364 for (i = 0; i < is_n; i++) sizes[i] = 1; 2365 bs_max = is_n; 2366 } else { 2367 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2368 A->nonzerostate++; 2369 } 2370 2371 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2372 row = rows[j]; 2373 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2374 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2375 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2376 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2377 if (diag != (PetscScalar)0.0) { 2378 if (baij->ilen[row / bs] > 0) { 2379 baij->ilen[row / bs] = 1; 2380 baij->j[baij->i[row / bs]] = row / bs; 2381 2382 PetscCall(PetscArrayzero(aa, count * bs)); 2383 } 2384 /* Now insert all the diagonal values for this bs */ 2385 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2386 } else { /* (diag == 0.0) */ 2387 baij->ilen[row / bs] = 0; 2388 } /* end (diag == 0.0) */ 2389 } else { /* (sizes[i] != bs) */ 2390 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2391 for (k = 0; k < count; k++) { 2392 aa[0] = zero; 2393 aa += bs; 2394 } 2395 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2396 } 2397 } 2398 2399 PetscCall(PetscFree2(rows, sizes)); 2400 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2405 { 2406 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2407 PetscInt i, j, k, count; 2408 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2409 PetscScalar zero = 0.0; 2410 MatScalar *aa; 2411 const PetscScalar *xx; 2412 PetscScalar *bb; 2413 PetscBool *zeroed, vecs = PETSC_FALSE; 2414 2415 PetscFunctionBegin; 2416 /* fix right hand side if needed */ 2417 if (x && b) { 2418 PetscCall(VecGetArrayRead(x, &xx)); 2419 PetscCall(VecGetArray(b, &bb)); 2420 vecs = PETSC_TRUE; 2421 } 2422 2423 /* zero the columns */ 2424 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2425 for (i = 0; i < is_n; i++) { 2426 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2427 zeroed[is_idx[i]] = PETSC_TRUE; 2428 } 2429 for (i = 0; i < A->rmap->N; i++) { 2430 if (!zeroed[i]) { 2431 row = i / bs; 2432 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2433 for (k = 0; k < bs; k++) { 2434 col = bs * baij->j[j] + k; 2435 if (zeroed[col]) { 2436 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2437 if (vecs) bb[i] -= aa[0] * xx[col]; 2438 aa[0] = 0.0; 2439 } 2440 } 2441 } 2442 } else if (vecs) bb[i] = diag * xx[i]; 2443 } 2444 PetscCall(PetscFree(zeroed)); 2445 if (vecs) { 2446 PetscCall(VecRestoreArrayRead(x, &xx)); 2447 PetscCall(VecRestoreArray(b, &bb)); 2448 } 2449 2450 /* zero the rows */ 2451 for (i = 0; i < is_n; i++) { 2452 row = is_idx[i]; 2453 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2454 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2455 for (k = 0; k < count; k++) { 2456 aa[0] = zero; 2457 aa += bs; 2458 } 2459 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2460 } 2461 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2466 { 2467 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2468 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2469 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2470 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2471 PetscInt ridx, cidx, bs2 = a->bs2; 2472 PetscBool roworiented = a->roworiented; 2473 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2474 2475 PetscFunctionBegin; 2476 for (k = 0; k < m; k++) { /* loop over added rows */ 2477 row = im[k]; 2478 brow = row / bs; 2479 if (row < 0) continue; 2480 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2481 rp = aj + ai[brow]; 2482 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2483 rmax = imax[brow]; 2484 nrow = ailen[brow]; 2485 low = 0; 2486 high = nrow; 2487 for (l = 0; l < n; l++) { /* loop over added columns */ 2488 if (in[l] < 0) continue; 2489 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2490 col = in[l]; 2491 bcol = col / bs; 2492 ridx = row % bs; 2493 cidx = col % bs; 2494 if (!A->structure_only) { 2495 if (roworiented) { 2496 value = v[l + k * n]; 2497 } else { 2498 value = v[k + l * m]; 2499 } 2500 } 2501 if (col <= lastcol) low = 0; 2502 else high = nrow; 2503 lastcol = col; 2504 while (high - low > 7) { 2505 t = (low + high) / 2; 2506 if (rp[t] > bcol) high = t; 2507 else low = t; 2508 } 2509 for (i = low; i < high; i++) { 2510 if (rp[i] > bcol) break; 2511 if (rp[i] == bcol) { 2512 bap = ap + bs2 * i + bs * cidx + ridx; 2513 if (!A->structure_only) { 2514 if (is == ADD_VALUES) *bap += value; 2515 else *bap = value; 2516 } 2517 goto noinsert1; 2518 } 2519 } 2520 if (nonew == 1) goto noinsert1; 2521 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2522 if (A->structure_only) { 2523 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2524 } else { 2525 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2526 } 2527 N = nrow++ - 1; 2528 high++; 2529 /* shift up all the later entries in this row */ 2530 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2531 rp[i] = bcol; 2532 if (!A->structure_only) { 2533 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2534 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2535 ap[bs2 * i + bs * cidx + ridx] = value; 2536 } 2537 a->nz++; 2538 A->nonzerostate++; 2539 noinsert1:; 2540 low = i; 2541 } 2542 ailen[brow] = nrow; 2543 } 2544 PetscFunctionReturn(0); 2545 } 2546 2547 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2548 { 2549 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2550 Mat outA; 2551 PetscBool row_identity, col_identity; 2552 2553 PetscFunctionBegin; 2554 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2555 PetscCall(ISIdentity(row, &row_identity)); 2556 PetscCall(ISIdentity(col, &col_identity)); 2557 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2558 2559 outA = inA; 2560 inA->factortype = MAT_FACTOR_LU; 2561 PetscCall(PetscFree(inA->solvertype)); 2562 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2563 2564 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2565 2566 PetscCall(PetscObjectReference((PetscObject)row)); 2567 PetscCall(ISDestroy(&a->row)); 2568 a->row = row; 2569 PetscCall(PetscObjectReference((PetscObject)col)); 2570 PetscCall(ISDestroy(&a->col)); 2571 a->col = col; 2572 2573 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2574 PetscCall(ISDestroy(&a->icol)); 2575 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2576 2577 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2578 if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); } 2579 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2580 PetscFunctionReturn(0); 2581 } 2582 2583 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices) 2584 { 2585 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2586 PetscInt i, nz, mbs; 2587 2588 PetscFunctionBegin; 2589 nz = baij->maxnz; 2590 mbs = baij->mbs; 2591 for (i = 0; i < nz; i++) baij->j[i] = indices[i]; 2592 baij->nz = nz; 2593 for (i = 0; i < mbs; i++) baij->ilen[i] = baij->imax[i]; 2594 PetscFunctionReturn(0); 2595 } 2596 2597 /*@ 2598 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2599 2600 Input Parameters: 2601 + mat - the `MATSEQBAIJ` matrix 2602 - indices - the column indices 2603 2604 Level: advanced 2605 2606 Notes: 2607 This can be called if you have precomputed the nonzero structure of the 2608 matrix and want to provide it to the matrix object to improve the performance 2609 of the `MatSetValues()` operation. 2610 2611 You MUST have set the correct numbers of nonzeros per row in the call to 2612 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2613 2614 MUST be called before any calls to `MatSetValues()` 2615 2616 .seealso: `MATSEQBAIJ`, `MatSetValues()` 2617 @*/ 2618 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2619 { 2620 PetscFunctionBegin; 2621 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2622 PetscValidIntPointer(indices, 2); 2623 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2624 PetscFunctionReturn(0); 2625 } 2626 2627 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2628 { 2629 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2630 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2631 PetscReal atmp; 2632 PetscScalar *x, zero = 0.0; 2633 MatScalar *aa; 2634 PetscInt ncols, brow, krow, kcol; 2635 2636 PetscFunctionBegin; 2637 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2638 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2639 bs = A->rmap->bs; 2640 aa = a->a; 2641 ai = a->i; 2642 aj = a->j; 2643 mbs = a->mbs; 2644 2645 PetscCall(VecSet(v, zero)); 2646 PetscCall(VecGetArray(v, &x)); 2647 PetscCall(VecGetLocalSize(v, &n)); 2648 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2649 for (i = 0; i < mbs; i++) { 2650 ncols = ai[1] - ai[0]; 2651 ai++; 2652 brow = bs * i; 2653 for (j = 0; j < ncols; j++) { 2654 for (kcol = 0; kcol < bs; kcol++) { 2655 for (krow = 0; krow < bs; krow++) { 2656 atmp = PetscAbsScalar(*aa); 2657 aa++; 2658 row = brow + krow; /* row index */ 2659 if (PetscAbsScalar(x[row]) < atmp) { 2660 x[row] = atmp; 2661 if (idx) idx[row] = bs * (*aj) + kcol; 2662 } 2663 } 2664 } 2665 aj++; 2666 } 2667 } 2668 PetscCall(VecRestoreArray(v, &x)); 2669 PetscFunctionReturn(0); 2670 } 2671 2672 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2673 { 2674 PetscFunctionBegin; 2675 /* If the two matrices have the same copy implementation, use fast copy. */ 2676 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2677 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2678 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2679 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2680 2681 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2682 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2683 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2684 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2685 } else { 2686 PetscCall(MatCopy_Basic(A, B, str)); 2687 } 2688 PetscFunctionReturn(0); 2689 } 2690 2691 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2692 { 2693 PetscFunctionBegin; 2694 PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL)); 2695 PetscFunctionReturn(0); 2696 } 2697 2698 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2699 { 2700 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 *array = a->a; 2704 PetscFunctionReturn(0); 2705 } 2706 2707 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2708 { 2709 PetscFunctionBegin; 2710 *array = NULL; 2711 PetscFunctionReturn(0); 2712 } 2713 2714 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2715 { 2716 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2717 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2718 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2719 2720 PetscFunctionBegin; 2721 /* Set the number of nonzeros in the new matrix */ 2722 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2723 PetscFunctionReturn(0); 2724 } 2725 2726 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2727 { 2728 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2729 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2730 PetscBLASInt one = 1; 2731 2732 PetscFunctionBegin; 2733 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2734 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2735 if (e) { 2736 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2737 if (e) { 2738 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2739 if (e) str = SAME_NONZERO_PATTERN; 2740 } 2741 } 2742 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2743 } 2744 if (str == SAME_NONZERO_PATTERN) { 2745 PetscScalar alpha = a; 2746 PetscBLASInt bnz; 2747 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2748 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2749 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2750 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2751 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2752 } else { 2753 Mat B; 2754 PetscInt *nnz; 2755 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2756 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2757 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2758 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2759 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2760 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2761 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2762 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2763 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2764 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2765 PetscCall(MatHeaderMerge(Y, &B)); 2766 PetscCall(PetscFree(nnz)); 2767 } 2768 PetscFunctionReturn(0); 2769 } 2770 2771 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2772 { 2773 #if defined(PETSC_USE_COMPLEX) 2774 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2775 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2776 MatScalar *aa = a->a; 2777 2778 PetscFunctionBegin; 2779 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2780 #else 2781 PetscFunctionBegin; 2782 #endif 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2787 { 2788 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2789 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2790 MatScalar *aa = a->a; 2791 2792 PetscFunctionBegin; 2793 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2794 PetscFunctionReturn(0); 2795 } 2796 2797 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2798 { 2799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2800 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2801 MatScalar *aa = a->a; 2802 2803 PetscFunctionBegin; 2804 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 /* 2809 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2810 */ 2811 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2812 { 2813 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2814 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2815 PetscInt nz = a->i[m], row, *jj, mr, col; 2816 2817 PetscFunctionBegin; 2818 *nn = n; 2819 if (!ia) PetscFunctionReturn(0); 2820 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2821 PetscCall(PetscCalloc1(n, &collengths)); 2822 PetscCall(PetscMalloc1(n + 1, &cia)); 2823 PetscCall(PetscMalloc1(nz, &cja)); 2824 jj = a->j; 2825 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2826 cia[0] = oshift; 2827 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2828 PetscCall(PetscArrayzero(collengths, n)); 2829 jj = a->j; 2830 for (row = 0; row < m; row++) { 2831 mr = a->i[row + 1] - a->i[row]; 2832 for (i = 0; i < mr; i++) { 2833 col = *jj++; 2834 2835 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2836 } 2837 } 2838 PetscCall(PetscFree(collengths)); 2839 *ia = cia; 2840 *ja = cja; 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2845 { 2846 PetscFunctionBegin; 2847 if (!ia) PetscFunctionReturn(0); 2848 PetscCall(PetscFree(*ia)); 2849 PetscCall(PetscFree(*ja)); 2850 PetscFunctionReturn(0); 2851 } 2852 2853 /* 2854 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2855 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2856 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2857 */ 2858 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2859 { 2860 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2861 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2862 PetscInt nz = a->i[m], row, *jj, mr, col; 2863 PetscInt *cspidx; 2864 2865 PetscFunctionBegin; 2866 *nn = n; 2867 if (!ia) PetscFunctionReturn(0); 2868 2869 PetscCall(PetscCalloc1(n, &collengths)); 2870 PetscCall(PetscMalloc1(n + 1, &cia)); 2871 PetscCall(PetscMalloc1(nz, &cja)); 2872 PetscCall(PetscMalloc1(nz, &cspidx)); 2873 jj = a->j; 2874 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2875 cia[0] = oshift; 2876 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2877 PetscCall(PetscArrayzero(collengths, n)); 2878 jj = a->j; 2879 for (row = 0; row < m; row++) { 2880 mr = a->i[row + 1] - a->i[row]; 2881 for (i = 0; i < mr; i++) { 2882 col = *jj++; 2883 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2884 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2885 } 2886 } 2887 PetscCall(PetscFree(collengths)); 2888 *ia = cia; 2889 *ja = cja; 2890 *spidx = cspidx; 2891 PetscFunctionReturn(0); 2892 } 2893 2894 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2895 { 2896 PetscFunctionBegin; 2897 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2898 PetscCall(PetscFree(*spidx)); 2899 PetscFunctionReturn(0); 2900 } 2901 2902 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2903 { 2904 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2905 2906 PetscFunctionBegin; 2907 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2908 PetscCall(MatShift_Basic(Y, a)); 2909 PetscFunctionReturn(0); 2910 } 2911 2912 /* -------------------------------------------------------------------*/ 2913 static struct _MatOps MatOps_Values = { 2914 MatSetValues_SeqBAIJ, 2915 MatGetRow_SeqBAIJ, 2916 MatRestoreRow_SeqBAIJ, 2917 MatMult_SeqBAIJ_N, 2918 /* 4*/ MatMultAdd_SeqBAIJ_N, 2919 MatMultTranspose_SeqBAIJ, 2920 MatMultTransposeAdd_SeqBAIJ, 2921 NULL, 2922 NULL, 2923 NULL, 2924 /* 10*/ NULL, 2925 MatLUFactor_SeqBAIJ, 2926 NULL, 2927 NULL, 2928 MatTranspose_SeqBAIJ, 2929 /* 15*/ MatGetInfo_SeqBAIJ, 2930 MatEqual_SeqBAIJ, 2931 MatGetDiagonal_SeqBAIJ, 2932 MatDiagonalScale_SeqBAIJ, 2933 MatNorm_SeqBAIJ, 2934 /* 20*/ NULL, 2935 MatAssemblyEnd_SeqBAIJ, 2936 MatSetOption_SeqBAIJ, 2937 MatZeroEntries_SeqBAIJ, 2938 /* 24*/ MatZeroRows_SeqBAIJ, 2939 NULL, 2940 NULL, 2941 NULL, 2942 NULL, 2943 /* 29*/ MatSetUp_SeqBAIJ, 2944 NULL, 2945 NULL, 2946 NULL, 2947 NULL, 2948 /* 34*/ MatDuplicate_SeqBAIJ, 2949 NULL, 2950 NULL, 2951 MatILUFactor_SeqBAIJ, 2952 NULL, 2953 /* 39*/ MatAXPY_SeqBAIJ, 2954 MatCreateSubMatrices_SeqBAIJ, 2955 MatIncreaseOverlap_SeqBAIJ, 2956 MatGetValues_SeqBAIJ, 2957 MatCopy_SeqBAIJ, 2958 /* 44*/ NULL, 2959 MatScale_SeqBAIJ, 2960 MatShift_SeqBAIJ, 2961 NULL, 2962 MatZeroRowsColumns_SeqBAIJ, 2963 /* 49*/ NULL, 2964 MatGetRowIJ_SeqBAIJ, 2965 MatRestoreRowIJ_SeqBAIJ, 2966 MatGetColumnIJ_SeqBAIJ, 2967 MatRestoreColumnIJ_SeqBAIJ, 2968 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2969 NULL, 2970 NULL, 2971 NULL, 2972 MatSetValuesBlocked_SeqBAIJ, 2973 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2974 MatDestroy_SeqBAIJ, 2975 MatView_SeqBAIJ, 2976 NULL, 2977 NULL, 2978 /* 64*/ NULL, 2979 NULL, 2980 NULL, 2981 NULL, 2982 NULL, 2983 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2984 NULL, 2985 MatConvert_Basic, 2986 NULL, 2987 NULL, 2988 /* 74*/ NULL, 2989 MatFDColoringApply_BAIJ, 2990 NULL, 2991 NULL, 2992 NULL, 2993 /* 79*/ NULL, 2994 NULL, 2995 NULL, 2996 NULL, 2997 MatLoad_SeqBAIJ, 2998 /* 84*/ NULL, 2999 NULL, 3000 NULL, 3001 NULL, 3002 NULL, 3003 /* 89*/ NULL, 3004 NULL, 3005 NULL, 3006 NULL, 3007 NULL, 3008 /* 94*/ NULL, 3009 NULL, 3010 NULL, 3011 NULL, 3012 NULL, 3013 /* 99*/ NULL, 3014 NULL, 3015 NULL, 3016 MatConjugate_SeqBAIJ, 3017 NULL, 3018 /*104*/ NULL, 3019 MatRealPart_SeqBAIJ, 3020 MatImaginaryPart_SeqBAIJ, 3021 NULL, 3022 NULL, 3023 /*109*/ NULL, 3024 NULL, 3025 NULL, 3026 NULL, 3027 MatMissingDiagonal_SeqBAIJ, 3028 /*114*/ NULL, 3029 NULL, 3030 NULL, 3031 NULL, 3032 NULL, 3033 /*119*/ NULL, 3034 NULL, 3035 MatMultHermitianTranspose_SeqBAIJ, 3036 MatMultHermitianTransposeAdd_SeqBAIJ, 3037 NULL, 3038 /*124*/ NULL, 3039 MatGetColumnReductions_SeqBAIJ, 3040 MatInvertBlockDiagonal_SeqBAIJ, 3041 NULL, 3042 NULL, 3043 /*129*/ NULL, 3044 NULL, 3045 NULL, 3046 NULL, 3047 NULL, 3048 /*134*/ NULL, 3049 NULL, 3050 NULL, 3051 NULL, 3052 NULL, 3053 /*139*/ MatSetBlockSizes_Default, 3054 NULL, 3055 NULL, 3056 MatFDColoringSetUp_SeqXAIJ, 3057 NULL, 3058 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3059 MatDestroySubMatrices_SeqBAIJ, 3060 NULL, 3061 NULL, 3062 NULL, 3063 NULL, 3064 /*150*/ NULL, 3065 }; 3066 3067 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3068 { 3069 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3070 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3071 3072 PetscFunctionBegin; 3073 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3074 3075 /* allocate space for values if not already there */ 3076 if (!aij->saved_values) { PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); } 3077 3078 /* copy values over */ 3079 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3080 PetscFunctionReturn(0); 3081 } 3082 3083 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3084 { 3085 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3086 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3087 3088 PetscFunctionBegin; 3089 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3090 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3091 3092 /* copy values over */ 3093 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3094 PetscFunctionReturn(0); 3095 } 3096 3097 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3098 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3099 3100 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) 3101 { 3102 Mat_SeqBAIJ *b; 3103 PetscInt i, mbs, nbs, bs2; 3104 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3105 3106 PetscFunctionBegin; 3107 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3108 if (nz == MAT_SKIP_ALLOCATION) { 3109 skipallocation = PETSC_TRUE; 3110 nz = 0; 3111 } 3112 3113 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3114 PetscCall(PetscLayoutSetUp(B->rmap)); 3115 PetscCall(PetscLayoutSetUp(B->cmap)); 3116 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3117 3118 B->preallocated = PETSC_TRUE; 3119 3120 mbs = B->rmap->n / bs; 3121 nbs = B->cmap->n / bs; 3122 bs2 = bs * bs; 3123 3124 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3125 3126 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3127 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3128 if (nnz) { 3129 for (i = 0; i < mbs; i++) { 3130 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3131 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3132 } 3133 } 3134 3135 b = (Mat_SeqBAIJ *)B->data; 3136 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3137 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3138 PetscOptionsEnd(); 3139 3140 if (!flg) { 3141 switch (bs) { 3142 case 1: 3143 B->ops->mult = MatMult_SeqBAIJ_1; 3144 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3145 break; 3146 case 2: 3147 B->ops->mult = MatMult_SeqBAIJ_2; 3148 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3149 break; 3150 case 3: 3151 B->ops->mult = MatMult_SeqBAIJ_3; 3152 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3153 break; 3154 case 4: 3155 B->ops->mult = MatMult_SeqBAIJ_4; 3156 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3157 break; 3158 case 5: 3159 B->ops->mult = MatMult_SeqBAIJ_5; 3160 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3161 break; 3162 case 6: 3163 B->ops->mult = MatMult_SeqBAIJ_6; 3164 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3165 break; 3166 case 7: 3167 B->ops->mult = MatMult_SeqBAIJ_7; 3168 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3169 break; 3170 case 9: { 3171 PetscInt version = 1; 3172 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3173 switch (version) { 3174 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3175 case 1: 3176 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3177 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3178 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3179 break; 3180 #endif 3181 default: 3182 B->ops->mult = MatMult_SeqBAIJ_N; 3183 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3184 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3185 break; 3186 } 3187 break; 3188 } 3189 case 11: 3190 B->ops->mult = MatMult_SeqBAIJ_11; 3191 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3192 break; 3193 case 12: { 3194 PetscInt version = 1; 3195 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3196 switch (version) { 3197 case 1: 3198 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3199 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3200 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3201 break; 3202 case 2: 3203 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3204 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3205 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3206 break; 3207 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3208 case 3: 3209 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3210 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3211 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3212 break; 3213 #endif 3214 default: 3215 B->ops->mult = MatMult_SeqBAIJ_N; 3216 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3217 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3218 break; 3219 } 3220 break; 3221 } 3222 case 15: { 3223 PetscInt version = 1; 3224 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3225 switch (version) { 3226 case 1: 3227 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3228 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3229 break; 3230 case 2: 3231 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3232 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3233 break; 3234 case 3: 3235 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3236 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3237 break; 3238 case 4: 3239 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3240 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3241 break; 3242 default: 3243 B->ops->mult = MatMult_SeqBAIJ_N; 3244 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3245 break; 3246 } 3247 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3248 break; 3249 } 3250 default: 3251 B->ops->mult = MatMult_SeqBAIJ_N; 3252 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3253 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3254 break; 3255 } 3256 } 3257 B->ops->sor = MatSOR_SeqBAIJ; 3258 b->mbs = mbs; 3259 b->nbs = nbs; 3260 if (!skipallocation) { 3261 if (!b->imax) { 3262 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3263 3264 b->free_imax_ilen = PETSC_TRUE; 3265 } 3266 /* b->ilen will count nonzeros in each block row so far. */ 3267 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3268 if (!nnz) { 3269 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3270 else if (nz < 0) nz = 1; 3271 nz = PetscMin(nz, nbs); 3272 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3273 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3274 } else { 3275 PetscInt64 nz64 = 0; 3276 for (i = 0; i < mbs; i++) { 3277 b->imax[i] = nnz[i]; 3278 nz64 += nnz[i]; 3279 } 3280 PetscCall(PetscIntCast(nz64, &nz)); 3281 } 3282 3283 /* allocate the matrix space */ 3284 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3285 if (B->structure_only) { 3286 PetscCall(PetscMalloc1(nz, &b->j)); 3287 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3288 } else { 3289 PetscInt nzbs2 = 0; 3290 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3291 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3292 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3293 } 3294 PetscCall(PetscArrayzero(b->j, nz)); 3295 3296 if (B->structure_only) { 3297 b->singlemalloc = PETSC_FALSE; 3298 b->free_a = PETSC_FALSE; 3299 } else { 3300 b->singlemalloc = PETSC_TRUE; 3301 b->free_a = PETSC_TRUE; 3302 } 3303 b->free_ij = PETSC_TRUE; 3304 3305 b->i[0] = 0; 3306 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3307 3308 } else { 3309 b->free_a = PETSC_FALSE; 3310 b->free_ij = PETSC_FALSE; 3311 } 3312 3313 b->bs2 = bs2; 3314 b->mbs = mbs; 3315 b->nz = 0; 3316 b->maxnz = nz; 3317 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3318 B->was_assembled = PETSC_FALSE; 3319 B->assembled = PETSC_FALSE; 3320 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3321 PetscFunctionReturn(0); 3322 } 3323 3324 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3325 { 3326 PetscInt i, m, nz, nz_max = 0, *nnz; 3327 PetscScalar *values = NULL; 3328 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3329 3330 PetscFunctionBegin; 3331 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3332 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3333 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3334 PetscCall(PetscLayoutSetUp(B->rmap)); 3335 PetscCall(PetscLayoutSetUp(B->cmap)); 3336 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3337 m = B->rmap->n / bs; 3338 3339 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3340 PetscCall(PetscMalloc1(m + 1, &nnz)); 3341 for (i = 0; i < m; i++) { 3342 nz = ii[i + 1] - ii[i]; 3343 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3344 nz_max = PetscMax(nz_max, nz); 3345 nnz[i] = nz; 3346 } 3347 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3348 PetscCall(PetscFree(nnz)); 3349 3350 values = (PetscScalar *)V; 3351 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3352 for (i = 0; i < m; i++) { 3353 PetscInt ncols = ii[i + 1] - ii[i]; 3354 const PetscInt *icols = jj + ii[i]; 3355 if (bs == 1 || !roworiented) { 3356 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3357 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3358 } else { 3359 PetscInt j; 3360 for (j = 0; j < ncols; j++) { 3361 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3362 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3363 } 3364 } 3365 } 3366 if (!V) PetscCall(PetscFree(values)); 3367 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3368 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3369 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3370 PetscFunctionReturn(0); 3371 } 3372 3373 /*@C 3374 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3375 3376 Not Collective 3377 3378 Input Parameter: 3379 . mat - a `MATSEQBAIJ` matrix 3380 3381 Output Parameter: 3382 . array - pointer to the data 3383 3384 Level: intermediate 3385 3386 .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3387 @*/ 3388 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3389 { 3390 PetscFunctionBegin; 3391 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3392 PetscFunctionReturn(0); 3393 } 3394 3395 /*@C 3396 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3397 3398 Not Collective 3399 3400 Input Parameters: 3401 + mat - a `MATSEQBAIJ` matrix 3402 - array - pointer to the data 3403 3404 Level: intermediate 3405 3406 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3407 @*/ 3408 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3409 { 3410 PetscFunctionBegin; 3411 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3412 PetscFunctionReturn(0); 3413 } 3414 3415 /*MC 3416 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3417 block sparse compressed row format. 3418 3419 Options Database Keys: 3420 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3421 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3422 3423 Level: beginner 3424 3425 Notes: 3426 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3427 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3428 3429 Run with -info to see what version of the matrix-vector product is being used 3430 3431 .seealso: `MatCreateSeqBAIJ()` 3432 M*/ 3433 3434 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3435 3436 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3437 { 3438 PetscMPIInt size; 3439 Mat_SeqBAIJ *b; 3440 3441 PetscFunctionBegin; 3442 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3443 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3444 3445 PetscCall(PetscNew(&b)); 3446 B->data = (void *)b; 3447 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3448 3449 b->row = NULL; 3450 b->col = NULL; 3451 b->icol = NULL; 3452 b->reallocs = 0; 3453 b->saved_values = NULL; 3454 3455 b->roworiented = PETSC_TRUE; 3456 b->nonew = 0; 3457 b->diag = NULL; 3458 B->spptr = NULL; 3459 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3460 b->keepnonzeropattern = PETSC_FALSE; 3461 3462 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3463 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3464 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3465 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3466 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3467 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3468 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3469 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3470 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3471 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3472 #if defined(PETSC_HAVE_HYPRE) 3473 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3474 #endif 3475 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3476 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3477 PetscFunctionReturn(0); 3478 } 3479 3480 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3481 { 3482 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3483 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3484 3485 PetscFunctionBegin; 3486 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3487 3488 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3489 c->imax = a->imax; 3490 c->ilen = a->ilen; 3491 c->free_imax_ilen = PETSC_FALSE; 3492 } else { 3493 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3494 for (i = 0; i < mbs; i++) { 3495 c->imax[i] = a->imax[i]; 3496 c->ilen[i] = a->ilen[i]; 3497 } 3498 c->free_imax_ilen = PETSC_TRUE; 3499 } 3500 3501 /* allocate the matrix space */ 3502 if (mallocmatspace) { 3503 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3504 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3505 3506 c->i = a->i; 3507 c->j = a->j; 3508 c->singlemalloc = PETSC_FALSE; 3509 c->free_a = PETSC_TRUE; 3510 c->free_ij = PETSC_FALSE; 3511 c->parent = A; 3512 C->preallocated = PETSC_TRUE; 3513 C->assembled = PETSC_TRUE; 3514 3515 PetscCall(PetscObjectReference((PetscObject)A)); 3516 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3517 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3518 } else { 3519 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3520 3521 c->singlemalloc = PETSC_TRUE; 3522 c->free_a = PETSC_TRUE; 3523 c->free_ij = PETSC_TRUE; 3524 3525 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3526 if (mbs > 0) { 3527 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3528 if (cpvalues == MAT_COPY_VALUES) { 3529 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3530 } else { 3531 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3532 } 3533 } 3534 C->preallocated = PETSC_TRUE; 3535 C->assembled = PETSC_TRUE; 3536 } 3537 } 3538 3539 c->roworiented = a->roworiented; 3540 c->nonew = a->nonew; 3541 3542 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3543 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3544 3545 c->bs2 = a->bs2; 3546 c->mbs = a->mbs; 3547 c->nbs = a->nbs; 3548 3549 if (a->diag) { 3550 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3551 c->diag = a->diag; 3552 c->free_diag = PETSC_FALSE; 3553 } else { 3554 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3555 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3556 c->free_diag = PETSC_TRUE; 3557 } 3558 } else c->diag = NULL; 3559 3560 c->nz = a->nz; 3561 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3562 c->solve_work = NULL; 3563 c->mult_work = NULL; 3564 c->sor_workt = NULL; 3565 c->sor_work = NULL; 3566 3567 c->compressedrow.use = a->compressedrow.use; 3568 c->compressedrow.nrows = a->compressedrow.nrows; 3569 if (a->compressedrow.use) { 3570 i = a->compressedrow.nrows; 3571 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3572 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3573 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3574 } else { 3575 c->compressedrow.use = PETSC_FALSE; 3576 c->compressedrow.i = NULL; 3577 c->compressedrow.rindex = NULL; 3578 } 3579 C->nonzerostate = A->nonzerostate; 3580 3581 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3582 PetscFunctionReturn(0); 3583 } 3584 3585 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3586 { 3587 PetscFunctionBegin; 3588 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3589 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3590 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3591 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3592 PetscFunctionReturn(0); 3593 } 3594 3595 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3596 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3597 { 3598 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3599 PetscInt *rowidxs, *colidxs; 3600 PetscScalar *matvals; 3601 3602 PetscFunctionBegin; 3603 PetscCall(PetscViewerSetUp(viewer)); 3604 3605 /* read matrix header */ 3606 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3607 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3608 M = header[1]; 3609 N = header[2]; 3610 nz = header[3]; 3611 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3612 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3613 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3614 3615 /* set block sizes from the viewer's .info file */ 3616 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3617 /* set local and global sizes if not set already */ 3618 if (mat->rmap->n < 0) mat->rmap->n = M; 3619 if (mat->cmap->n < 0) mat->cmap->n = N; 3620 if (mat->rmap->N < 0) mat->rmap->N = M; 3621 if (mat->cmap->N < 0) mat->cmap->N = N; 3622 PetscCall(PetscLayoutSetUp(mat->rmap)); 3623 PetscCall(PetscLayoutSetUp(mat->cmap)); 3624 3625 /* check if the matrix sizes are correct */ 3626 PetscCall(MatGetSize(mat, &rows, &cols)); 3627 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3628 PetscCall(MatGetBlockSize(mat, &bs)); 3629 PetscCall(MatGetLocalSize(mat, &m, &n)); 3630 mbs = m / bs; 3631 nbs = n / bs; 3632 3633 /* read in row lengths, column indices and nonzero values */ 3634 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3635 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3636 rowidxs[0] = 0; 3637 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3638 sum = rowidxs[m]; 3639 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3640 3641 /* read in column indices and nonzero values */ 3642 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3643 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3644 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3645 3646 { /* preallocate matrix storage */ 3647 PetscBT bt; /* helper bit set to count nonzeros */ 3648 PetscInt *nnz; 3649 PetscBool sbaij; 3650 3651 PetscCall(PetscBTCreate(nbs, &bt)); 3652 PetscCall(PetscCalloc1(mbs, &nnz)); 3653 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3654 for (i = 0; i < mbs; i++) { 3655 PetscCall(PetscBTMemzero(nbs, bt)); 3656 for (k = 0; k < bs; k++) { 3657 PetscInt row = bs * i + k; 3658 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3659 PetscInt col = colidxs[j]; 3660 if (!sbaij || col >= row) 3661 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3662 } 3663 } 3664 } 3665 PetscCall(PetscBTDestroy(&bt)); 3666 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3667 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3668 PetscCall(PetscFree(nnz)); 3669 } 3670 3671 /* store matrix values */ 3672 for (i = 0; i < m; i++) { 3673 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3674 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3675 } 3676 3677 PetscCall(PetscFree(rowidxs)); 3678 PetscCall(PetscFree2(colidxs, matvals)); 3679 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3680 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3681 PetscFunctionReturn(0); 3682 } 3683 3684 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3685 { 3686 PetscBool isbinary; 3687 3688 PetscFunctionBegin; 3689 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3690 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3691 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3692 PetscFunctionReturn(0); 3693 } 3694 3695 /*@C 3696 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3697 compressed row) format. For good matrix assembly performance the 3698 user should preallocate the matrix storage by setting the parameter nz 3699 (or the array nnz). By setting these parameters accurately, performance 3700 during matrix assembly can be increased by more than a factor of 50. 3701 3702 Collective 3703 3704 Input Parameters: 3705 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3706 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3707 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3708 . m - number of rows 3709 . n - number of columns 3710 . nz - number of nonzero blocks per block row (same for all rows) 3711 - nnz - array containing the number of nonzero blocks in the various block rows 3712 (possibly different for each block row) or NULL 3713 3714 Output Parameter: 3715 . A - the matrix 3716 3717 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3718 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3719 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3720 3721 Options Database Keys: 3722 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3723 - -mat_block_size - size of the blocks to use 3724 3725 Level: intermediate 3726 3727 Notes: 3728 The number of rows and columns must be divisible by blocksize. 3729 3730 If the nnz parameter is given then the nz parameter is ignored 3731 3732 A nonzero block is any block that as 1 or more nonzeros in it 3733 3734 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3735 storage. That is, the stored row and column indices can begin at 3736 either one (as in Fortran) or zero. See the users' manual for details. 3737 3738 Specify the preallocated storage with either nz or nnz (not both). 3739 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3740 allocation. See [Sparse Matrices](sec_matsparse) for details. 3741 matrices. 3742 3743 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3744 @*/ 3745 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3746 { 3747 PetscFunctionBegin; 3748 PetscCall(MatCreate(comm, A)); 3749 PetscCall(MatSetSizes(*A, m, n, m, n)); 3750 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3751 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3752 PetscFunctionReturn(0); 3753 } 3754 3755 /*@C 3756 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3757 per row in the matrix. For good matrix assembly performance the 3758 user should preallocate the matrix storage by setting the parameter nz 3759 (or the array nnz). By setting these parameters accurately, performance 3760 during matrix assembly can be increased by more than a factor of 50. 3761 3762 Collective 3763 3764 Input Parameters: 3765 + B - the matrix 3766 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3767 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3768 . nz - number of block nonzeros per block row (same for all rows) 3769 - nnz - array containing the number of block nonzeros in the various block rows 3770 (possibly different for each block row) or NULL 3771 3772 Options Database Keys: 3773 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3774 - -mat_block_size - size of the blocks to use 3775 3776 Level: intermediate 3777 3778 Notes: 3779 If the nnz parameter is given then the nz parameter is ignored 3780 3781 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3782 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3783 You can also run with the option -info and look for messages with the string 3784 malloc in them to see if additional memory allocation was needed. 3785 3786 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3787 storage. That is, the stored row and column indices can begin at 3788 either one (as in Fortran) or zero. See the users' manual for details. 3789 3790 Specify the preallocated storage with either nz or nnz (not both). 3791 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3792 allocation. See [Sparse Matrices](sec_matsparse) for details. 3793 3794 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3795 @*/ 3796 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3797 { 3798 PetscFunctionBegin; 3799 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3800 PetscValidType(B, 1); 3801 PetscValidLogicalCollectiveInt(B, bs, 2); 3802 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3803 PetscFunctionReturn(0); 3804 } 3805 3806 /*@C 3807 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3808 3809 Collective 3810 3811 Input Parameters: 3812 + B - the matrix 3813 . i - the indices into j for the start of each local row (starts with zero) 3814 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3815 - v - optional values in the matrix 3816 3817 Level: advanced 3818 3819 Notes: 3820 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3821 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3822 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3823 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3824 block column and the second index is over columns within a block. 3825 3826 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3827 3828 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3829 @*/ 3830 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3831 { 3832 PetscFunctionBegin; 3833 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3834 PetscValidType(B, 1); 3835 PetscValidLogicalCollectiveInt(B, bs, 2); 3836 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3837 PetscFunctionReturn(0); 3838 } 3839 3840 /*@ 3841 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3842 3843 Collective 3844 3845 Input Parameters: 3846 + comm - must be an MPI communicator of size 1 3847 . bs - size of block 3848 . m - number of rows 3849 . n - number of columns 3850 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3851 . j - column indices 3852 - a - matrix values 3853 3854 Output Parameter: 3855 . mat - the matrix 3856 3857 Level: advanced 3858 3859 Notes: 3860 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3861 once the matrix is destroyed 3862 3863 You cannot set new nonzero locations into this matrix, that will generate an error. 3864 3865 The i and j indices are 0 based 3866 3867 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3868 3869 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3870 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3871 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3872 with column-major ordering within blocks. 3873 3874 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3875 @*/ 3876 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3877 { 3878 PetscInt ii; 3879 Mat_SeqBAIJ *baij; 3880 3881 PetscFunctionBegin; 3882 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3883 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3884 3885 PetscCall(MatCreate(comm, mat)); 3886 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3887 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3888 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3889 baij = (Mat_SeqBAIJ *)(*mat)->data; 3890 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3891 3892 baij->i = i; 3893 baij->j = j; 3894 baij->a = a; 3895 3896 baij->singlemalloc = PETSC_FALSE; 3897 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3898 baij->free_a = PETSC_FALSE; 3899 baij->free_ij = PETSC_FALSE; 3900 3901 for (ii = 0; ii < m; ii++) { 3902 baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii]; 3903 PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]); 3904 } 3905 if (PetscDefined(USE_DEBUG)) { 3906 for (ii = 0; ii < baij->i[m]; ii++) { 3907 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3908 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3909 } 3910 } 3911 3912 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3913 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3914 PetscFunctionReturn(0); 3915 } 3916 3917 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3918 { 3919 PetscFunctionBegin; 3920 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3921 PetscFunctionReturn(0); 3922 } 3923