1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 12 #define TYPE BAIJ 13 #define TYPE_BS 14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 15 #undef TYPE_BS 16 #define TYPE_BS _BS 17 #define TYPE_BS_ON 18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 19 #undef TYPE_BS 20 #include "../src/mat/impls/aij/seq/seqhashmat.h" 21 #undef TYPE 22 #undef TYPE_BS_ON 23 24 #if defined(PETSC_HAVE_HYPRE) 25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 26 #endif 27 28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 30 #endif 31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 32 33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 34 { 35 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 36 PetscInt m, n, ib, jb, bs = A->rmap->bs; 37 MatScalar *a_val = a_aij->a; 38 39 PetscFunctionBegin; 40 PetscCall(MatGetSize(A, &m, &n)); 41 PetscCall(PetscArrayzero(reductions, n)); 42 if (type == NORM_2) { 43 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 44 for (jb = 0; jb < bs; jb++) { 45 for (ib = 0; ib < bs; ib++) { 46 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 47 a_val++; 48 } 49 } 50 } 51 } else if (type == NORM_1) { 52 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 53 for (jb = 0; jb < bs; jb++) { 54 for (ib = 0; ib < bs; ib++) { 55 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 56 a_val++; 57 } 58 } 59 } 60 } else if (type == NORM_INFINITY) { 61 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 62 for (jb = 0; jb < bs; jb++) { 63 for (ib = 0; ib < bs; ib++) { 64 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 65 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 66 a_val++; 67 } 68 } 69 } 70 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 71 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 72 for (jb = 0; jb < bs; jb++) { 73 for (ib = 0; ib < bs; ib++) { 74 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 75 a_val++; 76 } 77 } 78 } 79 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 81 for (jb = 0; jb < bs; jb++) { 82 for (ib = 0; ib < bs; ib++) { 83 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 84 a_val++; 85 } 86 } 87 } 88 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 89 if (type == NORM_2) { 90 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 91 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 92 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 93 } 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 static PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 98 { 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 100 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 101 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 102 PetscReal shift = 0.0; 103 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 104 105 PetscFunctionBegin; 106 allowzeropivot = PetscNot(A->erroriffailure); 107 108 if (a->idiagvalid) { 109 if (values) *values = a->idiag; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 113 diag_offset = a->diag; 114 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 115 diag = a->idiag; 116 if (values) *values = a->idiag; 117 /* factor and invert each block */ 118 switch (bs) { 119 case 1: 120 for (i = 0; i < mbs; i++) { 121 odiag = v + 1 * diag_offset[i]; 122 diag[0] = odiag[0]; 123 124 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 125 if (allowzeropivot) { 126 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 128 A->factorerror_zeropivot_row = i; 129 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 130 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 131 } 132 133 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134 diag += 1; 135 } 136 break; 137 case 2: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 4 * diag_offset[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146 diag += 4; 147 } 148 break; 149 case 3: 150 for (i = 0; i < mbs; i++) { 151 odiag = v + 9 * diag_offset[i]; 152 diag[0] = odiag[0]; 153 diag[1] = odiag[1]; 154 diag[2] = odiag[2]; 155 diag[3] = odiag[3]; 156 diag[4] = odiag[4]; 157 diag[5] = odiag[5]; 158 diag[6] = odiag[6]; 159 diag[7] = odiag[7]; 160 diag[8] = odiag[8]; 161 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 9; 164 } 165 break; 166 case 4: 167 for (i = 0; i < mbs; i++) { 168 odiag = v + 16 * diag_offset[i]; 169 PetscCall(PetscArraycpy(diag, odiag, 16)); 170 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 16; 173 } 174 break; 175 case 5: 176 for (i = 0; i < mbs; i++) { 177 odiag = v + 25 * diag_offset[i]; 178 PetscCall(PetscArraycpy(diag, odiag, 25)); 179 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 25; 182 } 183 break; 184 case 6: 185 for (i = 0; i < mbs; i++) { 186 odiag = v + 36 * diag_offset[i]; 187 PetscCall(PetscArraycpy(diag, odiag, 36)); 188 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 189 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190 diag += 36; 191 } 192 break; 193 case 7: 194 for (i = 0; i < mbs; i++) { 195 odiag = v + 49 * diag_offset[i]; 196 PetscCall(PetscArraycpy(diag, odiag, 49)); 197 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 198 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199 diag += 49; 200 } 201 break; 202 default: 203 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204 for (i = 0; i < mbs; i++) { 205 odiag = v + bs2 * diag_offset[i]; 206 PetscCall(PetscArraycpy(diag, odiag, bs2)); 207 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 208 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209 diag += bs2; 210 } 211 PetscCall(PetscFree2(v_work, v_pivots)); 212 } 213 a->idiagvalid = PETSC_TRUE; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 static PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220 PetscScalar *x, *work, *w, *workt, *t; 221 const MatScalar *v, *aa = a->a, *idiag; 222 const PetscScalar *b, *xb; 223 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226 227 PetscFunctionBegin; 228 its = its * lits; 229 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 230 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 231 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 232 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 233 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234 235 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236 237 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238 diag = a->diag; 239 idiag = a->idiag; 240 k = PetscMax(A->rmap->n, A->cmap->n); 241 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 242 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 243 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 244 work = a->mult_work; 245 t = a->sor_workt; 246 w = a->sor_work; 247 248 PetscCall(VecGetArray(xx, &x)); 249 PetscCall(VecGetArrayRead(bb, &b)); 250 251 if (flag & SOR_ZERO_INITIAL_GUESS) { 252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253 switch (bs) { 254 case 1: 255 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256 t[0] = b[0]; 257 i2 = 1; 258 idiag += 1; 259 for (i = 1; i < m; i++) { 260 v = aa + ai[i]; 261 vi = aj + ai[i]; 262 nz = diag[i] - ai[i]; 263 s[0] = b[i2]; 264 for (j = 0; j < nz; j++) { 265 xw[0] = x[vi[j]]; 266 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267 } 268 t[i2] = s[0]; 269 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270 x[i2] = xw[0]; 271 idiag += 1; 272 i2 += 1; 273 } 274 break; 275 case 2: 276 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 277 t[0] = b[0]; 278 t[1] = b[1]; 279 i2 = 2; 280 idiag += 4; 281 for (i = 1; i < m; i++) { 282 v = aa + 4 * ai[i]; 283 vi = aj + ai[i]; 284 nz = diag[i] - ai[i]; 285 s[0] = b[i2]; 286 s[1] = b[i2 + 1]; 287 for (j = 0; j < nz; j++) { 288 idx = 2 * vi[j]; 289 it = 4 * j; 290 xw[0] = x[idx]; 291 xw[1] = x[1 + idx]; 292 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293 } 294 t[i2] = s[0]; 295 t[i2 + 1] = s[1]; 296 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 297 x[i2] = xw[0]; 298 x[i2 + 1] = xw[1]; 299 idiag += 4; 300 i2 += 2; 301 } 302 break; 303 case 3: 304 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 305 t[0] = b[0]; 306 t[1] = b[1]; 307 t[2] = b[2]; 308 i2 = 3; 309 idiag += 9; 310 for (i = 1; i < m; i++) { 311 v = aa + 9 * ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; 315 s[1] = b[i2 + 1]; 316 s[2] = b[i2 + 2]; 317 while (nz--) { 318 idx = 3 * (*vi++); 319 xw[0] = x[idx]; 320 xw[1] = x[1 + idx]; 321 xw[2] = x[2 + idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323 v += 9; 324 } 325 t[i2] = s[0]; 326 t[i2 + 1] = s[1]; 327 t[i2 + 2] = s[2]; 328 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 329 x[i2] = xw[0]; 330 x[i2 + 1] = xw[1]; 331 x[i2 + 2] = xw[2]; 332 idiag += 9; 333 i2 += 3; 334 } 335 break; 336 case 4: 337 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 338 t[0] = b[0]; 339 t[1] = b[1]; 340 t[2] = b[2]; 341 t[3] = b[3]; 342 i2 = 4; 343 idiag += 16; 344 for (i = 1; i < m; i++) { 345 v = aa + 16 * ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 s[0] = b[i2]; 349 s[1] = b[i2 + 1]; 350 s[2] = b[i2 + 2]; 351 s[3] = b[i2 + 3]; 352 while (nz--) { 353 idx = 4 * (*vi++); 354 xw[0] = x[idx]; 355 xw[1] = x[1 + idx]; 356 xw[2] = x[2 + idx]; 357 xw[3] = x[3 + idx]; 358 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359 v += 16; 360 } 361 t[i2] = s[0]; 362 t[i2 + 1] = s[1]; 363 t[i2 + 2] = s[2]; 364 t[i2 + 3] = s[3]; 365 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 366 x[i2] = xw[0]; 367 x[i2 + 1] = xw[1]; 368 x[i2 + 2] = xw[2]; 369 x[i2 + 3] = xw[3]; 370 idiag += 16; 371 i2 += 4; 372 } 373 break; 374 case 5: 375 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 376 t[0] = b[0]; 377 t[1] = b[1]; 378 t[2] = b[2]; 379 t[3] = b[3]; 380 t[4] = b[4]; 381 i2 = 5; 382 idiag += 25; 383 for (i = 1; i < m; i++) { 384 v = aa + 25 * ai[i]; 385 vi = aj + ai[i]; 386 nz = diag[i] - ai[i]; 387 s[0] = b[i2]; 388 s[1] = b[i2 + 1]; 389 s[2] = b[i2 + 2]; 390 s[3] = b[i2 + 3]; 391 s[4] = b[i2 + 4]; 392 while (nz--) { 393 idx = 5 * (*vi++); 394 xw[0] = x[idx]; 395 xw[1] = x[1 + idx]; 396 xw[2] = x[2 + idx]; 397 xw[3] = x[3 + idx]; 398 xw[4] = x[4 + idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400 v += 25; 401 } 402 t[i2] = s[0]; 403 t[i2 + 1] = s[1]; 404 t[i2 + 2] = s[2]; 405 t[i2 + 3] = s[3]; 406 t[i2 + 4] = s[4]; 407 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 408 x[i2] = xw[0]; 409 x[i2 + 1] = xw[1]; 410 x[i2 + 2] = xw[2]; 411 x[i2 + 3] = xw[3]; 412 x[i2 + 4] = xw[4]; 413 idiag += 25; 414 i2 += 5; 415 } 416 break; 417 case 6: 418 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 419 t[0] = b[0]; 420 t[1] = b[1]; 421 t[2] = b[2]; 422 t[3] = b[3]; 423 t[4] = b[4]; 424 t[5] = b[5]; 425 i2 = 6; 426 idiag += 36; 427 for (i = 1; i < m; i++) { 428 v = aa + 36 * ai[i]; 429 vi = aj + ai[i]; 430 nz = diag[i] - ai[i]; 431 s[0] = b[i2]; 432 s[1] = b[i2 + 1]; 433 s[2] = b[i2 + 2]; 434 s[3] = b[i2 + 3]; 435 s[4] = b[i2 + 4]; 436 s[5] = b[i2 + 5]; 437 while (nz--) { 438 idx = 6 * (*vi++); 439 xw[0] = x[idx]; 440 xw[1] = x[1 + idx]; 441 xw[2] = x[2 + idx]; 442 xw[3] = x[3 + idx]; 443 xw[4] = x[4 + idx]; 444 xw[5] = x[5 + idx]; 445 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446 v += 36; 447 } 448 t[i2] = s[0]; 449 t[i2 + 1] = s[1]; 450 t[i2 + 2] = s[2]; 451 t[i2 + 3] = s[3]; 452 t[i2 + 4] = s[4]; 453 t[i2 + 5] = s[5]; 454 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 455 x[i2] = xw[0]; 456 x[i2 + 1] = xw[1]; 457 x[i2 + 2] = xw[2]; 458 x[i2 + 3] = xw[3]; 459 x[i2 + 4] = xw[4]; 460 x[i2 + 5] = xw[5]; 461 idiag += 36; 462 i2 += 6; 463 } 464 break; 465 case 7: 466 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 467 t[0] = b[0]; 468 t[1] = b[1]; 469 t[2] = b[2]; 470 t[3] = b[3]; 471 t[4] = b[4]; 472 t[5] = b[5]; 473 t[6] = b[6]; 474 i2 = 7; 475 idiag += 49; 476 for (i = 1; i < m; i++) { 477 v = aa + 49 * ai[i]; 478 vi = aj + ai[i]; 479 nz = diag[i] - ai[i]; 480 s[0] = b[i2]; 481 s[1] = b[i2 + 1]; 482 s[2] = b[i2 + 2]; 483 s[3] = b[i2 + 3]; 484 s[4] = b[i2 + 4]; 485 s[5] = b[i2 + 5]; 486 s[6] = b[i2 + 6]; 487 while (nz--) { 488 idx = 7 * (*vi++); 489 xw[0] = x[idx]; 490 xw[1] = x[1 + idx]; 491 xw[2] = x[2 + idx]; 492 xw[3] = x[3 + idx]; 493 xw[4] = x[4 + idx]; 494 xw[5] = x[5 + idx]; 495 xw[6] = x[6 + idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497 v += 49; 498 } 499 t[i2] = s[0]; 500 t[i2 + 1] = s[1]; 501 t[i2 + 2] = s[2]; 502 t[i2 + 3] = s[3]; 503 t[i2 + 4] = s[4]; 504 t[i2 + 5] = s[5]; 505 t[i2 + 6] = s[6]; 506 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 507 x[i2] = xw[0]; 508 x[i2 + 1] = xw[1]; 509 x[i2 + 2] = xw[2]; 510 x[i2 + 3] = xw[3]; 511 x[i2 + 4] = xw[4]; 512 x[i2 + 5] = xw[5]; 513 x[i2 + 6] = xw[6]; 514 idiag += 49; 515 i2 += 7; 516 } 517 break; 518 default: 519 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 520 PetscCall(PetscArraycpy(t, b, bs)); 521 i2 = bs; 522 idiag += bs2; 523 for (i = 1; i < m; i++) { 524 v = aa + bs2 * ai[i]; 525 vi = aj + ai[i]; 526 nz = diag[i] - ai[i]; 527 528 PetscCall(PetscArraycpy(w, b + i2, bs)); 529 /* copy all rows of x that are needed into contiguous space */ 530 workt = work; 531 for (j = 0; j < nz; j++) { 532 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533 workt += bs; 534 } 535 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 536 PetscCall(PetscArraycpy(t + i2, w, bs)); 537 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538 539 idiag += bs2; 540 i2 += bs; 541 } 542 break; 543 } 544 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 545 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546 xb = t; 547 } else xb = b; 548 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549 idiag = a->idiag + bs2 * (a->mbs - 1); 550 i2 = bs * (m - 1); 551 switch (bs) { 552 case 1: 553 s[0] = xb[i2]; 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 i2 -= 1; 557 for (i = m - 2; i >= 0; i--) { 558 v = aa + (diag[i] + 1); 559 vi = aj + diag[i] + 1; 560 nz = ai[i + 1] - diag[i] - 1; 561 s[0] = xb[i2]; 562 for (j = 0; j < nz; j++) { 563 xw[0] = x[vi[j]]; 564 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565 } 566 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567 x[i2] = xw[0]; 568 idiag -= 1; 569 i2 -= 1; 570 } 571 break; 572 case 2: 573 s[0] = xb[i2]; 574 s[1] = xb[i2 + 1]; 575 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 576 x[i2] = xw[0]; 577 x[i2 + 1] = xw[1]; 578 i2 -= 2; 579 idiag -= 4; 580 for (i = m - 2; i >= 0; i--) { 581 v = aa + 4 * (diag[i] + 1); 582 vi = aj + diag[i] + 1; 583 nz = ai[i + 1] - diag[i] - 1; 584 s[0] = xb[i2]; 585 s[1] = xb[i2 + 1]; 586 for (j = 0; j < nz; j++) { 587 idx = 2 * vi[j]; 588 it = 4 * j; 589 xw[0] = x[idx]; 590 xw[1] = x[1 + idx]; 591 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592 } 593 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 594 x[i2] = xw[0]; 595 x[i2 + 1] = xw[1]; 596 idiag -= 4; 597 i2 -= 2; 598 } 599 break; 600 case 3: 601 s[0] = xb[i2]; 602 s[1] = xb[i2 + 1]; 603 s[2] = xb[i2 + 2]; 604 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 605 x[i2] = xw[0]; 606 x[i2 + 1] = xw[1]; 607 x[i2 + 2] = xw[2]; 608 i2 -= 3; 609 idiag -= 9; 610 for (i = m - 2; i >= 0; i--) { 611 v = aa + 9 * (diag[i] + 1); 612 vi = aj + diag[i] + 1; 613 nz = ai[i + 1] - diag[i] - 1; 614 s[0] = xb[i2]; 615 s[1] = xb[i2 + 1]; 616 s[2] = xb[i2 + 2]; 617 while (nz--) { 618 idx = 3 * (*vi++); 619 xw[0] = x[idx]; 620 xw[1] = x[1 + idx]; 621 xw[2] = x[2 + idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 626 x[i2] = xw[0]; 627 x[i2 + 1] = xw[1]; 628 x[i2 + 2] = xw[2]; 629 idiag -= 9; 630 i2 -= 3; 631 } 632 break; 633 case 4: 634 s[0] = xb[i2]; 635 s[1] = xb[i2 + 1]; 636 s[2] = xb[i2 + 2]; 637 s[3] = xb[i2 + 3]; 638 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 639 x[i2] = xw[0]; 640 x[i2 + 1] = xw[1]; 641 x[i2 + 2] = xw[2]; 642 x[i2 + 3] = xw[3]; 643 i2 -= 4; 644 idiag -= 16; 645 for (i = m - 2; i >= 0; i--) { 646 v = aa + 16 * (diag[i] + 1); 647 vi = aj + diag[i] + 1; 648 nz = ai[i + 1] - diag[i] - 1; 649 s[0] = xb[i2]; 650 s[1] = xb[i2 + 1]; 651 s[2] = xb[i2 + 2]; 652 s[3] = xb[i2 + 3]; 653 while (nz--) { 654 idx = 4 * (*vi++); 655 xw[0] = x[idx]; 656 xw[1] = x[1 + idx]; 657 xw[2] = x[2 + idx]; 658 xw[3] = x[3 + idx]; 659 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660 v += 16; 661 } 662 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 663 x[i2] = xw[0]; 664 x[i2 + 1] = xw[1]; 665 x[i2 + 2] = xw[2]; 666 x[i2 + 3] = xw[3]; 667 idiag -= 16; 668 i2 -= 4; 669 } 670 break; 671 case 5: 672 s[0] = xb[i2]; 673 s[1] = xb[i2 + 1]; 674 s[2] = xb[i2 + 2]; 675 s[3] = xb[i2 + 3]; 676 s[4] = xb[i2 + 4]; 677 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 678 x[i2] = xw[0]; 679 x[i2 + 1] = xw[1]; 680 x[i2 + 2] = xw[2]; 681 x[i2 + 3] = xw[3]; 682 x[i2 + 4] = xw[4]; 683 i2 -= 5; 684 idiag -= 25; 685 for (i = m - 2; i >= 0; i--) { 686 v = aa + 25 * (diag[i] + 1); 687 vi = aj + diag[i] + 1; 688 nz = ai[i + 1] - diag[i] - 1; 689 s[0] = xb[i2]; 690 s[1] = xb[i2 + 1]; 691 s[2] = xb[i2 + 2]; 692 s[3] = xb[i2 + 3]; 693 s[4] = xb[i2 + 4]; 694 while (nz--) { 695 idx = 5 * (*vi++); 696 xw[0] = x[idx]; 697 xw[1] = x[1 + idx]; 698 xw[2] = x[2 + idx]; 699 xw[3] = x[3 + idx]; 700 xw[4] = x[4 + idx]; 701 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702 v += 25; 703 } 704 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 705 x[i2] = xw[0]; 706 x[i2 + 1] = xw[1]; 707 x[i2 + 2] = xw[2]; 708 x[i2 + 3] = xw[3]; 709 x[i2 + 4] = xw[4]; 710 idiag -= 25; 711 i2 -= 5; 712 } 713 break; 714 case 6: 715 s[0] = xb[i2]; 716 s[1] = xb[i2 + 1]; 717 s[2] = xb[i2 + 2]; 718 s[3] = xb[i2 + 3]; 719 s[4] = xb[i2 + 4]; 720 s[5] = xb[i2 + 5]; 721 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 722 x[i2] = xw[0]; 723 x[i2 + 1] = xw[1]; 724 x[i2 + 2] = xw[2]; 725 x[i2 + 3] = xw[3]; 726 x[i2 + 4] = xw[4]; 727 x[i2 + 5] = xw[5]; 728 i2 -= 6; 729 idiag -= 36; 730 for (i = m - 2; i >= 0; i--) { 731 v = aa + 36 * (diag[i] + 1); 732 vi = aj + diag[i] + 1; 733 nz = ai[i + 1] - diag[i] - 1; 734 s[0] = xb[i2]; 735 s[1] = xb[i2 + 1]; 736 s[2] = xb[i2 + 2]; 737 s[3] = xb[i2 + 3]; 738 s[4] = xb[i2 + 4]; 739 s[5] = xb[i2 + 5]; 740 while (nz--) { 741 idx = 6 * (*vi++); 742 xw[0] = x[idx]; 743 xw[1] = x[1 + idx]; 744 xw[2] = x[2 + idx]; 745 xw[3] = x[3 + idx]; 746 xw[4] = x[4 + idx]; 747 xw[5] = x[5 + idx]; 748 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749 v += 36; 750 } 751 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 752 x[i2] = xw[0]; 753 x[i2 + 1] = xw[1]; 754 x[i2 + 2] = xw[2]; 755 x[i2 + 3] = xw[3]; 756 x[i2 + 4] = xw[4]; 757 x[i2 + 5] = xw[5]; 758 idiag -= 36; 759 i2 -= 6; 760 } 761 break; 762 case 7: 763 s[0] = xb[i2]; 764 s[1] = xb[i2 + 1]; 765 s[2] = xb[i2 + 2]; 766 s[3] = xb[i2 + 3]; 767 s[4] = xb[i2 + 4]; 768 s[5] = xb[i2 + 5]; 769 s[6] = xb[i2 + 6]; 770 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 771 x[i2] = xw[0]; 772 x[i2 + 1] = xw[1]; 773 x[i2 + 2] = xw[2]; 774 x[i2 + 3] = xw[3]; 775 x[i2 + 4] = xw[4]; 776 x[i2 + 5] = xw[5]; 777 x[i2 + 6] = xw[6]; 778 i2 -= 7; 779 idiag -= 49; 780 for (i = m - 2; i >= 0; i--) { 781 v = aa + 49 * (diag[i] + 1); 782 vi = aj + diag[i] + 1; 783 nz = ai[i + 1] - diag[i] - 1; 784 s[0] = xb[i2]; 785 s[1] = xb[i2 + 1]; 786 s[2] = xb[i2 + 2]; 787 s[3] = xb[i2 + 3]; 788 s[4] = xb[i2 + 4]; 789 s[5] = xb[i2 + 5]; 790 s[6] = xb[i2 + 6]; 791 while (nz--) { 792 idx = 7 * (*vi++); 793 xw[0] = x[idx]; 794 xw[1] = x[1 + idx]; 795 xw[2] = x[2 + idx]; 796 xw[3] = x[3 + idx]; 797 xw[4] = x[4 + idx]; 798 xw[5] = x[5 + idx]; 799 xw[6] = x[6 + idx]; 800 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801 v += 49; 802 } 803 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 804 x[i2] = xw[0]; 805 x[i2 + 1] = xw[1]; 806 x[i2 + 2] = xw[2]; 807 x[i2 + 3] = xw[3]; 808 x[i2 + 4] = xw[4]; 809 x[i2 + 5] = xw[5]; 810 x[i2 + 6] = xw[6]; 811 idiag -= 49; 812 i2 -= 7; 813 } 814 break; 815 default: 816 PetscCall(PetscArraycpy(w, xb + i2, bs)); 817 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818 i2 -= bs; 819 idiag -= bs2; 820 for (i = m - 2; i >= 0; i--) { 821 v = aa + bs2 * (diag[i] + 1); 822 vi = aj + diag[i] + 1; 823 nz = ai[i + 1] - diag[i] - 1; 824 825 PetscCall(PetscArraycpy(w, xb + i2, bs)); 826 /* copy all rows of x that are needed into contiguous space */ 827 workt = work; 828 for (j = 0; j < nz; j++) { 829 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830 workt += bs; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 833 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834 835 idiag -= bs2; 836 i2 -= bs; 837 } 838 break; 839 } 840 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841 } 842 its--; 843 } 844 while (its--) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846 idiag = a->idiag; 847 i2 = 0; 848 switch (bs) { 849 case 1: 850 for (i = 0; i < m; i++) { 851 v = aa + ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i + 1] - ai[i]; 854 s[0] = b[i2]; 855 for (j = 0; j < nz; j++) { 856 xw[0] = x[vi[j]]; 857 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858 } 859 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860 x[i2] += xw[0]; 861 idiag += 1; 862 i2 += 1; 863 } 864 break; 865 case 2: 866 for (i = 0; i < m; i++) { 867 v = aa + 4 * ai[i]; 868 vi = aj + ai[i]; 869 nz = ai[i + 1] - ai[i]; 870 s[0] = b[i2]; 871 s[1] = b[i2 + 1]; 872 for (j = 0; j < nz; j++) { 873 idx = 2 * vi[j]; 874 it = 4 * j; 875 xw[0] = x[idx]; 876 xw[1] = x[1 + idx]; 877 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878 } 879 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 880 x[i2] += xw[0]; 881 x[i2 + 1] += xw[1]; 882 idiag += 4; 883 i2 += 2; 884 } 885 break; 886 case 3: 887 for (i = 0; i < m; i++) { 888 v = aa + 9 * ai[i]; 889 vi = aj + ai[i]; 890 nz = ai[i + 1] - ai[i]; 891 s[0] = b[i2]; 892 s[1] = b[i2 + 1]; 893 s[2] = b[i2 + 2]; 894 while (nz--) { 895 idx = 3 * (*vi++); 896 xw[0] = x[idx]; 897 xw[1] = x[1 + idx]; 898 xw[2] = x[2 + idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900 v += 9; 901 } 902 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 903 x[i2] += xw[0]; 904 x[i2 + 1] += xw[1]; 905 x[i2 + 2] += xw[2]; 906 idiag += 9; 907 i2 += 3; 908 } 909 break; 910 case 4: 911 for (i = 0; i < m; i++) { 912 v = aa + 16 * ai[i]; 913 vi = aj + ai[i]; 914 nz = ai[i + 1] - ai[i]; 915 s[0] = b[i2]; 916 s[1] = b[i2 + 1]; 917 s[2] = b[i2 + 2]; 918 s[3] = b[i2 + 3]; 919 while (nz--) { 920 idx = 4 * (*vi++); 921 xw[0] = x[idx]; 922 xw[1] = x[1 + idx]; 923 xw[2] = x[2 + idx]; 924 xw[3] = x[3 + idx]; 925 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926 v += 16; 927 } 928 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 929 x[i2] += xw[0]; 930 x[i2 + 1] += xw[1]; 931 x[i2 + 2] += xw[2]; 932 x[i2 + 3] += xw[3]; 933 idiag += 16; 934 i2 += 4; 935 } 936 break; 937 case 5: 938 for (i = 0; i < m; i++) { 939 v = aa + 25 * ai[i]; 940 vi = aj + ai[i]; 941 nz = ai[i + 1] - ai[i]; 942 s[0] = b[i2]; 943 s[1] = b[i2 + 1]; 944 s[2] = b[i2 + 2]; 945 s[3] = b[i2 + 3]; 946 s[4] = b[i2 + 4]; 947 while (nz--) { 948 idx = 5 * (*vi++); 949 xw[0] = x[idx]; 950 xw[1] = x[1 + idx]; 951 xw[2] = x[2 + idx]; 952 xw[3] = x[3 + idx]; 953 xw[4] = x[4 + idx]; 954 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955 v += 25; 956 } 957 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 958 x[i2] += xw[0]; 959 x[i2 + 1] += xw[1]; 960 x[i2 + 2] += xw[2]; 961 x[i2 + 3] += xw[3]; 962 x[i2 + 4] += xw[4]; 963 idiag += 25; 964 i2 += 5; 965 } 966 break; 967 case 6: 968 for (i = 0; i < m; i++) { 969 v = aa + 36 * ai[i]; 970 vi = aj + ai[i]; 971 nz = ai[i + 1] - ai[i]; 972 s[0] = b[i2]; 973 s[1] = b[i2 + 1]; 974 s[2] = b[i2 + 2]; 975 s[3] = b[i2 + 3]; 976 s[4] = b[i2 + 4]; 977 s[5] = b[i2 + 5]; 978 while (nz--) { 979 idx = 6 * (*vi++); 980 xw[0] = x[idx]; 981 xw[1] = x[1 + idx]; 982 xw[2] = x[2 + idx]; 983 xw[3] = x[3 + idx]; 984 xw[4] = x[4 + idx]; 985 xw[5] = x[5 + idx]; 986 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987 v += 36; 988 } 989 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 990 x[i2] += xw[0]; 991 x[i2 + 1] += xw[1]; 992 x[i2 + 2] += xw[2]; 993 x[i2 + 3] += xw[3]; 994 x[i2 + 4] += xw[4]; 995 x[i2 + 5] += xw[5]; 996 idiag += 36; 997 i2 += 6; 998 } 999 break; 1000 case 7: 1001 for (i = 0; i < m; i++) { 1002 v = aa + 49 * ai[i]; 1003 vi = aj + ai[i]; 1004 nz = ai[i + 1] - ai[i]; 1005 s[0] = b[i2]; 1006 s[1] = b[i2 + 1]; 1007 s[2] = b[i2 + 2]; 1008 s[3] = b[i2 + 3]; 1009 s[4] = b[i2 + 4]; 1010 s[5] = b[i2 + 5]; 1011 s[6] = b[i2 + 6]; 1012 while (nz--) { 1013 idx = 7 * (*vi++); 1014 xw[0] = x[idx]; 1015 xw[1] = x[1 + idx]; 1016 xw[2] = x[2 + idx]; 1017 xw[3] = x[3 + idx]; 1018 xw[4] = x[4 + idx]; 1019 xw[5] = x[5 + idx]; 1020 xw[6] = x[6 + idx]; 1021 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022 v += 49; 1023 } 1024 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1025 x[i2] += xw[0]; 1026 x[i2 + 1] += xw[1]; 1027 x[i2 + 2] += xw[2]; 1028 x[i2 + 3] += xw[3]; 1029 x[i2 + 4] += xw[4]; 1030 x[i2 + 5] += xw[5]; 1031 x[i2 + 6] += xw[6]; 1032 idiag += 49; 1033 i2 += 7; 1034 } 1035 break; 1036 default: 1037 for (i = 0; i < m; i++) { 1038 v = aa + bs2 * ai[i]; 1039 vi = aj + ai[i]; 1040 nz = ai[i + 1] - ai[i]; 1041 1042 PetscCall(PetscArraycpy(w, b + i2, bs)); 1043 /* copy all rows of x that are needed into contiguous space */ 1044 workt = work; 1045 for (j = 0; j < nz; j++) { 1046 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047 workt += bs; 1048 } 1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051 1052 idiag += bs2; 1053 i2 += bs; 1054 } 1055 break; 1056 } 1057 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058 } 1059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060 idiag = a->idiag + bs2 * (a->mbs - 1); 1061 i2 = bs * (m - 1); 1062 switch (bs) { 1063 case 1: 1064 for (i = m - 1; i >= 0; i--) { 1065 v = aa + ai[i]; 1066 vi = aj + ai[i]; 1067 nz = ai[i + 1] - ai[i]; 1068 s[0] = b[i2]; 1069 for (j = 0; j < nz; j++) { 1070 xw[0] = x[vi[j]]; 1071 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072 } 1073 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074 x[i2] += xw[0]; 1075 idiag -= 1; 1076 i2 -= 1; 1077 } 1078 break; 1079 case 2: 1080 for (i = m - 1; i >= 0; i--) { 1081 v = aa + 4 * ai[i]; 1082 vi = aj + ai[i]; 1083 nz = ai[i + 1] - ai[i]; 1084 s[0] = b[i2]; 1085 s[1] = b[i2 + 1]; 1086 for (j = 0; j < nz; j++) { 1087 idx = 2 * vi[j]; 1088 it = 4 * j; 1089 xw[0] = x[idx]; 1090 xw[1] = x[1 + idx]; 1091 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092 } 1093 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1094 x[i2] += xw[0]; 1095 x[i2 + 1] += xw[1]; 1096 idiag -= 4; 1097 i2 -= 2; 1098 } 1099 break; 1100 case 3: 1101 for (i = m - 1; i >= 0; i--) { 1102 v = aa + 9 * ai[i]; 1103 vi = aj + ai[i]; 1104 nz = ai[i + 1] - ai[i]; 1105 s[0] = b[i2]; 1106 s[1] = b[i2 + 1]; 1107 s[2] = b[i2 + 2]; 1108 while (nz--) { 1109 idx = 3 * (*vi++); 1110 xw[0] = x[idx]; 1111 xw[1] = x[1 + idx]; 1112 xw[2] = x[2 + idx]; 1113 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114 v += 9; 1115 } 1116 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1117 x[i2] += xw[0]; 1118 x[i2 + 1] += xw[1]; 1119 x[i2 + 2] += xw[2]; 1120 idiag -= 9; 1121 i2 -= 3; 1122 } 1123 break; 1124 case 4: 1125 for (i = m - 1; i >= 0; i--) { 1126 v = aa + 16 * ai[i]; 1127 vi = aj + ai[i]; 1128 nz = ai[i + 1] - ai[i]; 1129 s[0] = b[i2]; 1130 s[1] = b[i2 + 1]; 1131 s[2] = b[i2 + 2]; 1132 s[3] = b[i2 + 3]; 1133 while (nz--) { 1134 idx = 4 * (*vi++); 1135 xw[0] = x[idx]; 1136 xw[1] = x[1 + idx]; 1137 xw[2] = x[2 + idx]; 1138 xw[3] = x[3 + idx]; 1139 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140 v += 16; 1141 } 1142 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1143 x[i2] += xw[0]; 1144 x[i2 + 1] += xw[1]; 1145 x[i2 + 2] += xw[2]; 1146 x[i2 + 3] += xw[3]; 1147 idiag -= 16; 1148 i2 -= 4; 1149 } 1150 break; 1151 case 5: 1152 for (i = m - 1; i >= 0; i--) { 1153 v = aa + 25 * ai[i]; 1154 vi = aj + ai[i]; 1155 nz = ai[i + 1] - ai[i]; 1156 s[0] = b[i2]; 1157 s[1] = b[i2 + 1]; 1158 s[2] = b[i2 + 2]; 1159 s[3] = b[i2 + 3]; 1160 s[4] = b[i2 + 4]; 1161 while (nz--) { 1162 idx = 5 * (*vi++); 1163 xw[0] = x[idx]; 1164 xw[1] = x[1 + idx]; 1165 xw[2] = x[2 + idx]; 1166 xw[3] = x[3 + idx]; 1167 xw[4] = x[4 + idx]; 1168 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169 v += 25; 1170 } 1171 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1172 x[i2] += xw[0]; 1173 x[i2 + 1] += xw[1]; 1174 x[i2 + 2] += xw[2]; 1175 x[i2 + 3] += xw[3]; 1176 x[i2 + 4] += xw[4]; 1177 idiag -= 25; 1178 i2 -= 5; 1179 } 1180 break; 1181 case 6: 1182 for (i = m - 1; i >= 0; i--) { 1183 v = aa + 36 * ai[i]; 1184 vi = aj + ai[i]; 1185 nz = ai[i + 1] - ai[i]; 1186 s[0] = b[i2]; 1187 s[1] = b[i2 + 1]; 1188 s[2] = b[i2 + 2]; 1189 s[3] = b[i2 + 3]; 1190 s[4] = b[i2 + 4]; 1191 s[5] = b[i2 + 5]; 1192 while (nz--) { 1193 idx = 6 * (*vi++); 1194 xw[0] = x[idx]; 1195 xw[1] = x[1 + idx]; 1196 xw[2] = x[2 + idx]; 1197 xw[3] = x[3 + idx]; 1198 xw[4] = x[4 + idx]; 1199 xw[5] = x[5 + idx]; 1200 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201 v += 36; 1202 } 1203 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1204 x[i2] += xw[0]; 1205 x[i2 + 1] += xw[1]; 1206 x[i2 + 2] += xw[2]; 1207 x[i2 + 3] += xw[3]; 1208 x[i2 + 4] += xw[4]; 1209 x[i2 + 5] += xw[5]; 1210 idiag -= 36; 1211 i2 -= 6; 1212 } 1213 break; 1214 case 7: 1215 for (i = m - 1; i >= 0; i--) { 1216 v = aa + 49 * ai[i]; 1217 vi = aj + ai[i]; 1218 nz = ai[i + 1] - ai[i]; 1219 s[0] = b[i2]; 1220 s[1] = b[i2 + 1]; 1221 s[2] = b[i2 + 2]; 1222 s[3] = b[i2 + 3]; 1223 s[4] = b[i2 + 4]; 1224 s[5] = b[i2 + 5]; 1225 s[6] = b[i2 + 6]; 1226 while (nz--) { 1227 idx = 7 * (*vi++); 1228 xw[0] = x[idx]; 1229 xw[1] = x[1 + idx]; 1230 xw[2] = x[2 + idx]; 1231 xw[3] = x[3 + idx]; 1232 xw[4] = x[4 + idx]; 1233 xw[5] = x[5 + idx]; 1234 xw[6] = x[6 + idx]; 1235 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236 v += 49; 1237 } 1238 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1239 x[i2] += xw[0]; 1240 x[i2 + 1] += xw[1]; 1241 x[i2 + 2] += xw[2]; 1242 x[i2 + 3] += xw[3]; 1243 x[i2 + 4] += xw[4]; 1244 x[i2 + 5] += xw[5]; 1245 x[i2 + 6] += xw[6]; 1246 idiag -= 49; 1247 i2 -= 7; 1248 } 1249 break; 1250 default: 1251 for (i = m - 1; i >= 0; i--) { 1252 v = aa + bs2 * ai[i]; 1253 vi = aj + ai[i]; 1254 nz = ai[i + 1] - ai[i]; 1255 1256 PetscCall(PetscArraycpy(w, b + i2, bs)); 1257 /* copy all rows of x that are needed into contiguous space */ 1258 workt = work; 1259 for (j = 0; j < nz; j++) { 1260 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261 workt += bs; 1262 } 1263 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265 1266 idiag -= bs2; 1267 i2 -= bs; 1268 } 1269 break; 1270 } 1271 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272 } 1273 } 1274 PetscCall(VecRestoreArray(xx, &x)); 1275 PetscCall(VecRestoreArrayRead(bb, &b)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 /* 1280 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281 */ 1282 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285 #define matsetvaluesblocked4_ matsetvaluesblocked4 1286 #endif 1287 1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289 { 1290 Mat A = *AA; 1291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293 PetscInt *ai = a->i, *ailen = a->ilen; 1294 PetscInt *aj = a->j, stepval, lastcol = -1; 1295 const PetscScalar *value = v; 1296 MatScalar *ap, *aa = a->a, *bap; 1297 1298 PetscFunctionBegin; 1299 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300 stepval = (n - 1) * 4; 1301 for (k = 0; k < m; k++) { /* loop over added rows */ 1302 row = im[k]; 1303 rp = aj + ai[row]; 1304 ap = aa + 16 * ai[row]; 1305 nrow = ailen[row]; 1306 low = 0; 1307 high = nrow; 1308 for (l = 0; l < n; l++) { /* loop over added columns */ 1309 col = in[l]; 1310 if (col <= lastcol) low = 0; 1311 else high = nrow; 1312 lastcol = col; 1313 value = v + k * (stepval + 4 + l) * 4; 1314 while (high - low > 7) { 1315 t = (low + high) / 2; 1316 if (rp[t] > col) high = t; 1317 else low = t; 1318 } 1319 for (i = low; i < high; i++) { 1320 if (rp[i] > col) break; 1321 if (rp[i] == col) { 1322 bap = ap + 16 * i; 1323 for (ii = 0; ii < 4; ii++, value += stepval) { 1324 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325 } 1326 goto noinsert2; 1327 } 1328 } 1329 N = nrow++ - 1; 1330 high++; /* added new column index thus must search to one higher than before */ 1331 /* shift up all the later entries in this row */ 1332 for (ii = N; ii >= i; ii--) { 1333 rp[ii + 1] = rp[ii]; 1334 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335 } 1336 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337 rp[i] = col; 1338 bap = ap + 16 * i; 1339 for (ii = 0; ii < 4; ii++, value += stepval) { 1340 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341 } 1342 noinsert2:; 1343 low = i; 1344 } 1345 ailen[row] = nrow; 1346 } 1347 PetscFunctionReturnVoid(); 1348 } 1349 1350 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351 #define matsetvalues4_ MATSETVALUES4 1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353 #define matsetvalues4_ matsetvalues4 1354 #endif 1355 1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357 { 1358 Mat A = *AA; 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361 PetscInt *ai = a->i, *ailen = a->ilen; 1362 PetscInt *aj = a->j, brow, bcol; 1363 PetscInt ridx, cidx, lastcol = -1; 1364 MatScalar *ap, value, *aa = a->a, *bap; 1365 1366 PetscFunctionBegin; 1367 for (k = 0; k < m; k++) { /* loop over added rows */ 1368 row = im[k]; 1369 brow = row / 4; 1370 rp = aj + ai[brow]; 1371 ap = aa + 16 * ai[brow]; 1372 nrow = ailen[brow]; 1373 low = 0; 1374 high = nrow; 1375 for (l = 0; l < n; l++) { /* loop over added columns */ 1376 col = in[l]; 1377 bcol = col / 4; 1378 ridx = row % 4; 1379 cidx = col % 4; 1380 value = v[l + k * n]; 1381 if (col <= lastcol) low = 0; 1382 else high = nrow; 1383 lastcol = col; 1384 while (high - low > 7) { 1385 t = (low + high) / 2; 1386 if (rp[t] > bcol) high = t; 1387 else low = t; 1388 } 1389 for (i = low; i < high; i++) { 1390 if (rp[i] > bcol) break; 1391 if (rp[i] == bcol) { 1392 bap = ap + 16 * i + 4 * cidx + ridx; 1393 *bap += value; 1394 goto noinsert1; 1395 } 1396 } 1397 N = nrow++ - 1; 1398 high++; /* added new column thus must search to one higher than before */ 1399 /* shift up all the later entries in this row */ 1400 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1401 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1402 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403 rp[i] = bcol; 1404 ap[16 * i + 4 * cidx + ridx] = value; 1405 noinsert1:; 1406 low = i; 1407 } 1408 ailen[brow] = nrow; 1409 } 1410 PetscFunctionReturnVoid(); 1411 } 1412 1413 /* 1414 Checks for missing diagonals 1415 */ 1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1419 PetscInt *diag, *ii = a->i, i; 1420 1421 PetscFunctionBegin; 1422 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1423 *missing = PETSC_FALSE; 1424 if (A->rmap->n > 0 && !ii) { 1425 *missing = PETSC_TRUE; 1426 if (d) *d = 0; 1427 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1428 } else { 1429 PetscInt n; 1430 n = PetscMin(a->mbs, a->nbs); 1431 diag = a->diag; 1432 for (i = 0; i < n; i++) { 1433 if (diag[i] >= ii[i + 1]) { 1434 *missing = PETSC_TRUE; 1435 if (d) *d = i; 1436 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1437 break; 1438 } 1439 } 1440 } 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1445 { 1446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1447 PetscInt i, j, m = a->mbs; 1448 1449 PetscFunctionBegin; 1450 if (!a->diag) { 1451 PetscCall(PetscMalloc1(m, &a->diag)); 1452 a->free_diag = PETSC_TRUE; 1453 } 1454 for (i = 0; i < m; i++) { 1455 a->diag[i] = a->i[i + 1]; 1456 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1457 if (a->j[j] == i) { 1458 a->diag[i] = j; 1459 break; 1460 } 1461 } 1462 } 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1467 { 1468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1469 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1470 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1471 1472 PetscFunctionBegin; 1473 *nn = n; 1474 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1475 if (symmetric) { 1476 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1477 nz = tia[n]; 1478 } else { 1479 tia = a->i; 1480 tja = a->j; 1481 } 1482 1483 if (!blockcompressed && bs > 1) { 1484 (*nn) *= bs; 1485 /* malloc & create the natural set of indices */ 1486 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1487 if (n) { 1488 (*ia)[0] = oshift; 1489 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1490 } 1491 1492 for (i = 1; i < n; i++) { 1493 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1494 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1495 } 1496 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1497 1498 if (inja) { 1499 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1500 cnt = 0; 1501 for (i = 0; i < n; i++) { 1502 for (j = 0; j < bs; j++) { 1503 for (k = tia[i]; k < tia[i + 1]; k++) { 1504 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1505 } 1506 } 1507 } 1508 } 1509 1510 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1511 PetscCall(PetscFree(tia)); 1512 PetscCall(PetscFree(tja)); 1513 } 1514 } else if (oshift == 1) { 1515 if (symmetric) { 1516 nz = tia[A->rmap->n / bs]; 1517 /* add 1 to i and j indices */ 1518 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1519 *ia = tia; 1520 if (ja) { 1521 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1522 *ja = tja; 1523 } 1524 } else { 1525 nz = a->i[A->rmap->n / bs]; 1526 /* malloc space and add 1 to i and j indices */ 1527 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1528 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1529 if (ja) { 1530 PetscCall(PetscMalloc1(nz, ja)); 1531 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1532 } 1533 } 1534 } else { 1535 *ia = tia; 1536 if (ja) *ja = tja; 1537 } 1538 PetscFunctionReturn(PETSC_SUCCESS); 1539 } 1540 1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1542 { 1543 PetscFunctionBegin; 1544 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1545 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1546 PetscCall(PetscFree(*ia)); 1547 if (ja) PetscCall(PetscFree(*ja)); 1548 } 1549 PetscFunctionReturn(PETSC_SUCCESS); 1550 } 1551 1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1553 { 1554 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1555 1556 PetscFunctionBegin; 1557 if (A->hash_active) { 1558 PetscInt bs; 1559 A->ops[0] = a->cops; 1560 PetscCall(PetscHMapIJVDestroy(&a->ht)); 1561 PetscCall(MatGetBlockSize(A, &bs)); 1562 if (bs > 1) PetscCall(PetscHSetIJDestroy(&a->bht)); 1563 PetscCall(PetscFree(a->dnz)); 1564 PetscCall(PetscFree(a->bdnz)); 1565 A->hash_active = PETSC_FALSE; 1566 } 1567 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1568 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1569 PetscCall(ISDestroy(&a->row)); 1570 PetscCall(ISDestroy(&a->col)); 1571 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1572 PetscCall(PetscFree(a->idiag)); 1573 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1574 PetscCall(PetscFree(a->solve_work)); 1575 PetscCall(PetscFree(a->mult_work)); 1576 PetscCall(PetscFree(a->sor_workt)); 1577 PetscCall(PetscFree(a->sor_work)); 1578 PetscCall(ISDestroy(&a->icol)); 1579 PetscCall(PetscFree(a->saved_values)); 1580 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1581 1582 PetscCall(MatDestroy(&a->sbaijMat)); 1583 PetscCall(MatDestroy(&a->parent)); 1584 PetscCall(PetscFree(A->data)); 1585 1586 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1587 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1588 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1590 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1592 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1595 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1596 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1597 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1598 #if defined(PETSC_HAVE_HYPRE) 1599 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1600 #endif 1601 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1602 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 } 1605 1606 static PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1607 { 1608 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1609 1610 PetscFunctionBegin; 1611 switch (op) { 1612 case MAT_ROW_ORIENTED: 1613 a->roworiented = flg; 1614 break; 1615 case MAT_KEEP_NONZERO_PATTERN: 1616 a->keepnonzeropattern = flg; 1617 break; 1618 case MAT_NEW_NONZERO_LOCATIONS: 1619 a->nonew = (flg ? 0 : 1); 1620 break; 1621 case MAT_NEW_NONZERO_LOCATION_ERR: 1622 a->nonew = (flg ? -1 : 0); 1623 break; 1624 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1625 a->nonew = (flg ? -2 : 0); 1626 break; 1627 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1628 a->nounused = (flg ? -1 : 0); 1629 break; 1630 case MAT_FORCE_DIAGONAL_ENTRIES: 1631 case MAT_IGNORE_OFF_PROC_ENTRIES: 1632 case MAT_USE_HASH_TABLE: 1633 case MAT_SORTED_FULL: 1634 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1635 break; 1636 case MAT_SPD: 1637 case MAT_SYMMETRIC: 1638 case MAT_STRUCTURALLY_SYMMETRIC: 1639 case MAT_HERMITIAN: 1640 case MAT_SYMMETRY_ETERNAL: 1641 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1642 case MAT_SUBMAT_SINGLEIS: 1643 case MAT_STRUCTURE_ONLY: 1644 case MAT_SPD_ETERNAL: 1645 /* if the diagonal matrix is square it inherits some of the properties above */ 1646 break; 1647 default: 1648 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1649 } 1650 PetscFunctionReturn(PETSC_SUCCESS); 1651 } 1652 1653 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1654 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1655 { 1656 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1657 MatScalar *aa_i; 1658 PetscScalar *v_i; 1659 1660 PetscFunctionBegin; 1661 bs = A->rmap->bs; 1662 bs2 = bs * bs; 1663 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1664 1665 bn = row / bs; /* Block number */ 1666 bp = row % bs; /* Block Position */ 1667 M = ai[bn + 1] - ai[bn]; 1668 *nz = bs * M; 1669 1670 if (v) { 1671 *v = NULL; 1672 if (*nz) { 1673 PetscCall(PetscMalloc1(*nz, v)); 1674 for (i = 0; i < M; i++) { /* for each block in the block row */ 1675 v_i = *v + i * bs; 1676 aa_i = aa + bs2 * (ai[bn] + i); 1677 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1678 } 1679 } 1680 } 1681 1682 if (idx) { 1683 *idx = NULL; 1684 if (*nz) { 1685 PetscCall(PetscMalloc1(*nz, idx)); 1686 for (i = 0; i < M; i++) { /* for each block in the block row */ 1687 idx_i = *idx + i * bs; 1688 itmp = bs * aj[ai[bn] + i]; 1689 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1690 } 1691 } 1692 } 1693 PetscFunctionReturn(PETSC_SUCCESS); 1694 } 1695 1696 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1697 { 1698 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1699 1700 PetscFunctionBegin; 1701 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1702 PetscFunctionReturn(PETSC_SUCCESS); 1703 } 1704 1705 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1706 { 1707 PetscFunctionBegin; 1708 if (idx) PetscCall(PetscFree(*idx)); 1709 if (v) PetscCall(PetscFree(*v)); 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 static PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1714 { 1715 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1716 Mat C; 1717 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1718 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1719 MatScalar *ata, *aa = a->a; 1720 1721 PetscFunctionBegin; 1722 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1723 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1724 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1725 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1726 1727 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1728 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1729 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1730 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1731 1732 at = (Mat_SeqBAIJ *)C->data; 1733 ati = at->i; 1734 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1735 } else { 1736 C = *B; 1737 at = (Mat_SeqBAIJ *)C->data; 1738 ati = at->i; 1739 } 1740 1741 atj = at->j; 1742 ata = at->a; 1743 1744 /* Copy ati into atfill so we have locations of the next free space in atj */ 1745 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1746 1747 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1748 for (i = 0; i < mbs; i++) { 1749 anzj = ai[i + 1] - ai[i]; 1750 for (j = 0; j < anzj; j++) { 1751 atj[atfill[*aj]] = i; 1752 for (kr = 0; kr < bs; kr++) { 1753 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1754 } 1755 atfill[*aj++] += 1; 1756 } 1757 } 1758 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1759 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1760 1761 /* Clean up temporary space and complete requests. */ 1762 PetscCall(PetscFree(atfill)); 1763 1764 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1765 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1766 *B = C; 1767 } else { 1768 PetscCall(MatHeaderMerge(A, &C)); 1769 } 1770 PetscFunctionReturn(PETSC_SUCCESS); 1771 } 1772 1773 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1774 { 1775 Mat Btrans; 1776 1777 PetscFunctionBegin; 1778 *f = PETSC_FALSE; 1779 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1780 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1781 PetscCall(MatDestroy(&Btrans)); 1782 PetscFunctionReturn(PETSC_SUCCESS); 1783 } 1784 1785 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1786 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1787 { 1788 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1789 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1790 PetscInt *rowlens, *colidxs; 1791 PetscScalar *matvals; 1792 1793 PetscFunctionBegin; 1794 PetscCall(PetscViewerSetUp(viewer)); 1795 1796 M = mat->rmap->N; 1797 N = mat->cmap->N; 1798 m = mat->rmap->n; 1799 bs = mat->rmap->bs; 1800 nz = bs * bs * A->nz; 1801 1802 /* write matrix header */ 1803 header[0] = MAT_FILE_CLASSID; 1804 header[1] = M; 1805 header[2] = N; 1806 header[3] = nz; 1807 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1808 1809 /* store row lengths */ 1810 PetscCall(PetscMalloc1(m, &rowlens)); 1811 for (cnt = 0, i = 0; i < A->mbs; i++) 1812 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1813 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1814 PetscCall(PetscFree(rowlens)); 1815 1816 /* store column indices */ 1817 PetscCall(PetscMalloc1(nz, &colidxs)); 1818 for (cnt = 0, i = 0; i < A->mbs; i++) 1819 for (k = 0; k < bs; k++) 1820 for (j = A->i[i]; j < A->i[i + 1]; j++) 1821 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1822 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1823 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1824 PetscCall(PetscFree(colidxs)); 1825 1826 /* store nonzero values */ 1827 PetscCall(PetscMalloc1(nz, &matvals)); 1828 for (cnt = 0, i = 0; i < A->mbs; i++) 1829 for (k = 0; k < bs; k++) 1830 for (j = A->i[i]; j < A->i[i + 1]; j++) 1831 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1832 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1833 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1834 PetscCall(PetscFree(matvals)); 1835 1836 /* write block size option to the viewer's .info file */ 1837 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1838 PetscFunctionReturn(PETSC_SUCCESS); 1839 } 1840 1841 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1842 { 1843 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1844 PetscInt i, bs = A->rmap->bs, k; 1845 1846 PetscFunctionBegin; 1847 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1848 for (i = 0; i < a->mbs; i++) { 1849 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1850 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1851 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1852 } 1853 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1854 PetscFunctionReturn(PETSC_SUCCESS); 1855 } 1856 1857 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1858 { 1859 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1860 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1861 PetscViewerFormat format; 1862 1863 PetscFunctionBegin; 1864 if (A->structure_only) { 1865 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1866 PetscFunctionReturn(PETSC_SUCCESS); 1867 } 1868 1869 PetscCall(PetscViewerGetFormat(viewer, &format)); 1870 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1871 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1872 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1873 const char *matname; 1874 Mat aij; 1875 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1876 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1877 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1878 PetscCall(MatView(aij, viewer)); 1879 PetscCall(MatDestroy(&aij)); 1880 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1883 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1884 for (i = 0; i < a->mbs; i++) { 1885 for (j = 0; j < bs; j++) { 1886 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1887 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1888 for (l = 0; l < bs; l++) { 1889 #if defined(PETSC_USE_COMPLEX) 1890 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1891 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1892 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1893 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1894 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1895 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1896 } 1897 #else 1898 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1899 #endif 1900 } 1901 } 1902 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1903 } 1904 } 1905 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1906 } else { 1907 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1908 for (i = 0; i < a->mbs; i++) { 1909 for (j = 0; j < bs; j++) { 1910 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1911 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1912 for (l = 0; l < bs; l++) { 1913 #if defined(PETSC_USE_COMPLEX) 1914 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1915 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1916 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1917 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1918 } else { 1919 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1920 } 1921 #else 1922 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1923 #endif 1924 } 1925 } 1926 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1927 } 1928 } 1929 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1930 } 1931 PetscCall(PetscViewerFlush(viewer)); 1932 PetscFunctionReturn(PETSC_SUCCESS); 1933 } 1934 1935 #include <petscdraw.h> 1936 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1937 { 1938 Mat A = (Mat)Aa; 1939 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1940 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1941 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1942 MatScalar *aa; 1943 PetscViewer viewer; 1944 PetscViewerFormat format; 1945 1946 PetscFunctionBegin; 1947 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1948 PetscCall(PetscViewerGetFormat(viewer, &format)); 1949 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1950 1951 /* loop over matrix elements drawing boxes */ 1952 1953 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1954 PetscDrawCollectiveBegin(draw); 1955 /* Blue for negative, Cyan for zero and Red for positive */ 1956 color = PETSC_DRAW_BLUE; 1957 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1958 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1959 y_l = A->rmap->N - row - 1.0; 1960 y_r = y_l + 1.0; 1961 x_l = a->j[j] * bs; 1962 x_r = x_l + 1.0; 1963 aa = a->a + j * bs2; 1964 for (k = 0; k < bs; k++) { 1965 for (l = 0; l < bs; l++) { 1966 if (PetscRealPart(*aa++) >= 0.) continue; 1967 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1968 } 1969 } 1970 } 1971 } 1972 color = PETSC_DRAW_CYAN; 1973 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1974 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1975 y_l = A->rmap->N - row - 1.0; 1976 y_r = y_l + 1.0; 1977 x_l = a->j[j] * bs; 1978 x_r = x_l + 1.0; 1979 aa = a->a + j * bs2; 1980 for (k = 0; k < bs; k++) { 1981 for (l = 0; l < bs; l++) { 1982 if (PetscRealPart(*aa++) != 0.) continue; 1983 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1984 } 1985 } 1986 } 1987 } 1988 color = PETSC_DRAW_RED; 1989 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1990 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1991 y_l = A->rmap->N - row - 1.0; 1992 y_r = y_l + 1.0; 1993 x_l = a->j[j] * bs; 1994 x_r = x_l + 1.0; 1995 aa = a->a + j * bs2; 1996 for (k = 0; k < bs; k++) { 1997 for (l = 0; l < bs; l++) { 1998 if (PetscRealPart(*aa++) <= 0.) continue; 1999 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2000 } 2001 } 2002 } 2003 } 2004 PetscDrawCollectiveEnd(draw); 2005 } else { 2006 /* use contour shading to indicate magnitude of values */ 2007 /* first determine max of all nonzero values */ 2008 PetscReal minv = 0.0, maxv = 0.0; 2009 PetscDraw popup; 2010 2011 for (i = 0; i < a->nz * a->bs2; i++) { 2012 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2013 } 2014 if (minv >= maxv) maxv = minv + PETSC_SMALL; 2015 PetscCall(PetscDrawGetPopup(draw, &popup)); 2016 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2017 2018 PetscDrawCollectiveBegin(draw); 2019 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2020 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2021 y_l = A->rmap->N - row - 1.0; 2022 y_r = y_l + 1.0; 2023 x_l = a->j[j] * bs; 2024 x_r = x_l + 1.0; 2025 aa = a->a + j * bs2; 2026 for (k = 0; k < bs; k++) { 2027 for (l = 0; l < bs; l++) { 2028 MatScalar v = *aa++; 2029 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2030 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2031 } 2032 } 2033 } 2034 } 2035 PetscDrawCollectiveEnd(draw); 2036 } 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 2040 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2041 { 2042 PetscReal xl, yl, xr, yr, w, h; 2043 PetscDraw draw; 2044 PetscBool isnull; 2045 2046 PetscFunctionBegin; 2047 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2048 PetscCall(PetscDrawIsNull(draw, &isnull)); 2049 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2050 2051 xr = A->cmap->n; 2052 yr = A->rmap->N; 2053 h = yr / 10.0; 2054 w = xr / 10.0; 2055 xr += w; 2056 yr += h; 2057 xl = -w; 2058 yl = -h; 2059 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2060 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2061 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2062 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2063 PetscCall(PetscDrawSave(draw)); 2064 PetscFunctionReturn(PETSC_SUCCESS); 2065 } 2066 2067 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2068 { 2069 PetscBool iascii, isbinary, isdraw; 2070 2071 PetscFunctionBegin; 2072 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2073 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2074 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2075 if (iascii) { 2076 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2077 } else if (isbinary) { 2078 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2079 } else if (isdraw) { 2080 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2081 } else { 2082 Mat B; 2083 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2084 PetscCall(MatView(B, viewer)); 2085 PetscCall(MatDestroy(&B)); 2086 } 2087 PetscFunctionReturn(PETSC_SUCCESS); 2088 } 2089 2090 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2091 { 2092 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2093 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2094 PetscInt *ai = a->i, *ailen = a->ilen; 2095 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2096 MatScalar *ap, *aa = a->a; 2097 2098 PetscFunctionBegin; 2099 for (k = 0; k < m; k++) { /* loop over rows */ 2100 row = im[k]; 2101 brow = row / bs; 2102 if (row < 0) { 2103 v += n; 2104 continue; 2105 } /* negative row */ 2106 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2107 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2108 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2109 nrow = ailen[brow]; 2110 for (l = 0; l < n; l++) { /* loop over columns */ 2111 if (in[l] < 0) { 2112 v++; 2113 continue; 2114 } /* negative column */ 2115 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2116 col = in[l]; 2117 bcol = col / bs; 2118 cidx = col % bs; 2119 ridx = row % bs; 2120 high = nrow; 2121 low = 0; /* assume unsorted */ 2122 while (high - low > 5) { 2123 t = (low + high) / 2; 2124 if (rp[t] > bcol) high = t; 2125 else low = t; 2126 } 2127 for (i = low; i < high; i++) { 2128 if (rp[i] > bcol) break; 2129 if (rp[i] == bcol) { 2130 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2131 goto finished; 2132 } 2133 } 2134 *v++ = 0.0; 2135 finished:; 2136 } 2137 } 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2142 { 2143 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2144 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2145 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2146 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2147 PetscBool roworiented = a->roworiented; 2148 const PetscScalar *value = v; 2149 MatScalar *ap = NULL, *aa = a->a, *bap; 2150 2151 PetscFunctionBegin; 2152 if (roworiented) { 2153 stepval = (n - 1) * bs; 2154 } else { 2155 stepval = (m - 1) * bs; 2156 } 2157 for (k = 0; k < m; k++) { /* loop over added rows */ 2158 row = im[k]; 2159 if (row < 0) continue; 2160 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2161 rp = aj + ai[row]; 2162 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2163 rmax = imax[row]; 2164 nrow = ailen[row]; 2165 low = 0; 2166 high = nrow; 2167 for (l = 0; l < n; l++) { /* loop over added columns */ 2168 if (in[l] < 0) continue; 2169 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2170 col = in[l]; 2171 if (!A->structure_only) { 2172 if (roworiented) { 2173 value = v + (k * (stepval + bs) + l) * bs; 2174 } else { 2175 value = v + (l * (stepval + bs) + k) * bs; 2176 } 2177 } 2178 if (col <= lastcol) low = 0; 2179 else high = nrow; 2180 lastcol = col; 2181 while (high - low > 7) { 2182 t = (low + high) / 2; 2183 if (rp[t] > col) high = t; 2184 else low = t; 2185 } 2186 for (i = low; i < high; i++) { 2187 if (rp[i] > col) break; 2188 if (rp[i] == col) { 2189 if (A->structure_only) goto noinsert2; 2190 bap = ap + bs2 * i; 2191 if (roworiented) { 2192 if (is == ADD_VALUES) { 2193 for (ii = 0; ii < bs; ii++, value += stepval) { 2194 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2195 } 2196 } else { 2197 for (ii = 0; ii < bs; ii++, value += stepval) { 2198 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2199 } 2200 } 2201 } else { 2202 if (is == ADD_VALUES) { 2203 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2204 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2205 bap += bs; 2206 } 2207 } else { 2208 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2209 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2210 bap += bs; 2211 } 2212 } 2213 } 2214 goto noinsert2; 2215 } 2216 } 2217 if (nonew == 1) goto noinsert2; 2218 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2219 if (A->structure_only) { 2220 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2221 } else { 2222 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2223 } 2224 N = nrow++ - 1; 2225 high++; 2226 /* shift up all the later entries in this row */ 2227 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2228 rp[i] = col; 2229 if (!A->structure_only) { 2230 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2231 bap = ap + bs2 * i; 2232 if (roworiented) { 2233 for (ii = 0; ii < bs; ii++, value += stepval) { 2234 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2235 } 2236 } else { 2237 for (ii = 0; ii < bs; ii++, value += stepval) { 2238 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2239 } 2240 } 2241 } 2242 noinsert2:; 2243 low = i; 2244 } 2245 ailen[row] = nrow; 2246 } 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2251 { 2252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2253 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2254 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2255 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2256 MatScalar *aa = a->a, *ap; 2257 PetscReal ratio = 0.6; 2258 2259 PetscFunctionBegin; 2260 if (mode == MAT_FLUSH_ASSEMBLY || (A->was_assembled && A->ass_nonzerostate == A->nonzerostate)) PetscFunctionReturn(PETSC_SUCCESS); 2261 2262 if (m) rmax = ailen[0]; 2263 for (i = 1; i < mbs; i++) { 2264 /* move each row back by the amount of empty slots (fshift) before it*/ 2265 fshift += imax[i - 1] - ailen[i - 1]; 2266 rmax = PetscMax(rmax, ailen[i]); 2267 if (fshift) { 2268 ip = aj + ai[i]; 2269 ap = aa + bs2 * ai[i]; 2270 N = ailen[i]; 2271 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2272 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2273 } 2274 ai[i] = ai[i - 1] + ailen[i - 1]; 2275 } 2276 if (mbs) { 2277 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2278 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2279 } 2280 2281 /* reset ilen and imax for each row */ 2282 a->nonzerorowcnt = 0; 2283 if (A->structure_only) { 2284 PetscCall(PetscFree2(a->imax, a->ilen)); 2285 } else { /* !A->structure_only */ 2286 for (i = 0; i < mbs; i++) { 2287 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2288 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2289 } 2290 } 2291 a->nz = ai[mbs]; 2292 2293 /* diagonals may have moved, so kill the diagonal pointers */ 2294 a->idiagvalid = PETSC_FALSE; 2295 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2296 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2297 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2298 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2299 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2300 2301 A->info.mallocs += a->reallocs; 2302 a->reallocs = 0; 2303 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2304 a->rmax = rmax; 2305 2306 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2307 PetscFunctionReturn(PETSC_SUCCESS); 2308 } 2309 2310 /* 2311 This function returns an array of flags which indicate the locations of contiguous 2312 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2313 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2314 Assume: sizes should be long enough to hold all the values. 2315 */ 2316 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2317 { 2318 PetscInt j = 0; 2319 2320 PetscFunctionBegin; 2321 for (PetscInt i = 0; i < n; j++) { 2322 PetscInt row = idx[i]; 2323 if (row % bs != 0) { /* Not the beginning of a block */ 2324 sizes[j] = 1; 2325 i++; 2326 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2327 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2328 i++; 2329 } else { /* Beginning of the block, so check if the complete block exists */ 2330 PetscBool flg = PETSC_TRUE; 2331 for (PetscInt k = 1; k < bs; k++) { 2332 if (row + k != idx[i + k]) { /* break in the block */ 2333 flg = PETSC_FALSE; 2334 break; 2335 } 2336 } 2337 if (flg) { /* No break in the bs */ 2338 sizes[j] = bs; 2339 i += bs; 2340 } else { 2341 sizes[j] = 1; 2342 i++; 2343 } 2344 } 2345 } 2346 *bs_max = j; 2347 PetscFunctionReturn(PETSC_SUCCESS); 2348 } 2349 2350 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2351 { 2352 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2353 PetscInt i, j, k, count, *rows; 2354 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2355 PetscScalar zero = 0.0; 2356 MatScalar *aa; 2357 const PetscScalar *xx; 2358 PetscScalar *bb; 2359 2360 PetscFunctionBegin; 2361 /* fix right hand side if needed */ 2362 if (x && b) { 2363 PetscCall(VecGetArrayRead(x, &xx)); 2364 PetscCall(VecGetArray(b, &bb)); 2365 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2366 PetscCall(VecRestoreArrayRead(x, &xx)); 2367 PetscCall(VecRestoreArray(b, &bb)); 2368 } 2369 2370 /* Make a copy of the IS and sort it */ 2371 /* allocate memory for rows,sizes */ 2372 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2373 2374 /* copy IS values to rows, and sort them */ 2375 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2376 PetscCall(PetscSortInt(is_n, rows)); 2377 2378 if (baij->keepnonzeropattern) { 2379 for (i = 0; i < is_n; i++) sizes[i] = 1; 2380 bs_max = is_n; 2381 } else { 2382 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2383 A->nonzerostate++; 2384 } 2385 2386 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2387 row = rows[j]; 2388 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2389 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2390 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2391 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2392 if (diag != (PetscScalar)0.0) { 2393 if (baij->ilen[row / bs] > 0) { 2394 baij->ilen[row / bs] = 1; 2395 baij->j[baij->i[row / bs]] = row / bs; 2396 2397 PetscCall(PetscArrayzero(aa, count * bs)); 2398 } 2399 /* Now insert all the diagonal values for this bs */ 2400 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2401 } else { /* (diag == 0.0) */ 2402 baij->ilen[row / bs] = 0; 2403 } /* end (diag == 0.0) */ 2404 } else { /* (sizes[i] != bs) */ 2405 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2406 for (k = 0; k < count; k++) { 2407 aa[0] = zero; 2408 aa += bs; 2409 } 2410 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2411 } 2412 } 2413 2414 PetscCall(PetscFree2(rows, sizes)); 2415 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2416 PetscFunctionReturn(PETSC_SUCCESS); 2417 } 2418 2419 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2420 { 2421 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2422 PetscInt i, j, k, count; 2423 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2424 PetscScalar zero = 0.0; 2425 MatScalar *aa; 2426 const PetscScalar *xx; 2427 PetscScalar *bb; 2428 PetscBool *zeroed, vecs = PETSC_FALSE; 2429 2430 PetscFunctionBegin; 2431 /* fix right hand side if needed */ 2432 if (x && b) { 2433 PetscCall(VecGetArrayRead(x, &xx)); 2434 PetscCall(VecGetArray(b, &bb)); 2435 vecs = PETSC_TRUE; 2436 } 2437 2438 /* zero the columns */ 2439 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2440 for (i = 0; i < is_n; i++) { 2441 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2442 zeroed[is_idx[i]] = PETSC_TRUE; 2443 } 2444 for (i = 0; i < A->rmap->N; i++) { 2445 if (!zeroed[i]) { 2446 row = i / bs; 2447 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2448 for (k = 0; k < bs; k++) { 2449 col = bs * baij->j[j] + k; 2450 if (zeroed[col]) { 2451 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2452 if (vecs) bb[i] -= aa[0] * xx[col]; 2453 aa[0] = 0.0; 2454 } 2455 } 2456 } 2457 } else if (vecs) bb[i] = diag * xx[i]; 2458 } 2459 PetscCall(PetscFree(zeroed)); 2460 if (vecs) { 2461 PetscCall(VecRestoreArrayRead(x, &xx)); 2462 PetscCall(VecRestoreArray(b, &bb)); 2463 } 2464 2465 /* zero the rows */ 2466 for (i = 0; i < is_n; i++) { 2467 row = is_idx[i]; 2468 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2469 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2470 for (k = 0; k < count; k++) { 2471 aa[0] = zero; 2472 aa += bs; 2473 } 2474 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2475 } 2476 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2477 PetscFunctionReturn(PETSC_SUCCESS); 2478 } 2479 2480 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2481 { 2482 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2483 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2484 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2485 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2486 PetscInt ridx, cidx, bs2 = a->bs2; 2487 PetscBool roworiented = a->roworiented; 2488 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2489 2490 PetscFunctionBegin; 2491 for (k = 0; k < m; k++) { /* loop over added rows */ 2492 row = im[k]; 2493 brow = row / bs; 2494 if (row < 0) continue; 2495 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2496 rp = aj + ai[brow]; 2497 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2498 rmax = imax[brow]; 2499 nrow = ailen[brow]; 2500 low = 0; 2501 high = nrow; 2502 for (l = 0; l < n; l++) { /* loop over added columns */ 2503 if (in[l] < 0) continue; 2504 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2505 col = in[l]; 2506 bcol = col / bs; 2507 ridx = row % bs; 2508 cidx = col % bs; 2509 if (!A->structure_only) { 2510 if (roworiented) { 2511 value = v[l + k * n]; 2512 } else { 2513 value = v[k + l * m]; 2514 } 2515 } 2516 if (col <= lastcol) low = 0; 2517 else high = nrow; 2518 lastcol = col; 2519 while (high - low > 7) { 2520 t = (low + high) / 2; 2521 if (rp[t] > bcol) high = t; 2522 else low = t; 2523 } 2524 for (i = low; i < high; i++) { 2525 if (rp[i] > bcol) break; 2526 if (rp[i] == bcol) { 2527 bap = ap + bs2 * i + bs * cidx + ridx; 2528 if (!A->structure_only) { 2529 if (is == ADD_VALUES) *bap += value; 2530 else *bap = value; 2531 } 2532 goto noinsert1; 2533 } 2534 } 2535 if (nonew == 1) goto noinsert1; 2536 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2537 if (A->structure_only) { 2538 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2539 } else { 2540 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2541 } 2542 N = nrow++ - 1; 2543 high++; 2544 /* shift up all the later entries in this row */ 2545 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2546 rp[i] = bcol; 2547 if (!A->structure_only) { 2548 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2549 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2550 ap[bs2 * i + bs * cidx + ridx] = value; 2551 } 2552 a->nz++; 2553 A->nonzerostate++; 2554 noinsert1:; 2555 low = i; 2556 } 2557 ailen[brow] = nrow; 2558 } 2559 PetscFunctionReturn(PETSC_SUCCESS); 2560 } 2561 2562 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2563 { 2564 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2565 Mat outA; 2566 PetscBool row_identity, col_identity; 2567 2568 PetscFunctionBegin; 2569 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2570 PetscCall(ISIdentity(row, &row_identity)); 2571 PetscCall(ISIdentity(col, &col_identity)); 2572 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2573 2574 outA = inA; 2575 inA->factortype = MAT_FACTOR_LU; 2576 PetscCall(PetscFree(inA->solvertype)); 2577 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2578 2579 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2580 2581 PetscCall(PetscObjectReference((PetscObject)row)); 2582 PetscCall(ISDestroy(&a->row)); 2583 a->row = row; 2584 PetscCall(PetscObjectReference((PetscObject)col)); 2585 PetscCall(ISDestroy(&a->col)); 2586 a->col = col; 2587 2588 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2589 PetscCall(ISDestroy(&a->icol)); 2590 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2591 2592 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2593 if (!a->solve_work) PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); 2594 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2595 PetscFunctionReturn(PETSC_SUCCESS); 2596 } 2597 2598 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2599 { 2600 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2601 2602 PetscFunctionBegin; 2603 baij->nz = baij->maxnz; 2604 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2605 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2606 PetscFunctionReturn(PETSC_SUCCESS); 2607 } 2608 2609 /*@ 2610 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2611 2612 Input Parameters: 2613 + mat - the `MATSEQBAIJ` matrix 2614 - indices - the column indices 2615 2616 Level: advanced 2617 2618 Notes: 2619 This can be called if you have precomputed the nonzero structure of the 2620 matrix and want to provide it to the matrix object to improve the performance 2621 of the `MatSetValues()` operation. 2622 2623 You MUST have set the correct numbers of nonzeros per row in the call to 2624 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2625 2626 MUST be called before any calls to `MatSetValues()` 2627 2628 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSetValues()` 2629 @*/ 2630 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2631 { 2632 PetscFunctionBegin; 2633 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2634 PetscAssertPointer(indices, 2); 2635 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2636 PetscFunctionReturn(PETSC_SUCCESS); 2637 } 2638 2639 static PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2640 { 2641 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2642 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2643 PetscReal atmp; 2644 PetscScalar *x, zero = 0.0; 2645 MatScalar *aa; 2646 PetscInt ncols, brow, krow, kcol; 2647 2648 PetscFunctionBegin; 2649 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2650 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2651 bs = A->rmap->bs; 2652 aa = a->a; 2653 ai = a->i; 2654 aj = a->j; 2655 mbs = a->mbs; 2656 2657 PetscCall(VecSet(v, zero)); 2658 PetscCall(VecGetArray(v, &x)); 2659 PetscCall(VecGetLocalSize(v, &n)); 2660 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2661 for (i = 0; i < mbs; i++) { 2662 ncols = ai[1] - ai[0]; 2663 ai++; 2664 brow = bs * i; 2665 for (j = 0; j < ncols; j++) { 2666 for (kcol = 0; kcol < bs; kcol++) { 2667 for (krow = 0; krow < bs; krow++) { 2668 atmp = PetscAbsScalar(*aa); 2669 aa++; 2670 row = brow + krow; /* row index */ 2671 if (PetscAbsScalar(x[row]) < atmp) { 2672 x[row] = atmp; 2673 if (idx) idx[row] = bs * (*aj) + kcol; 2674 } 2675 } 2676 } 2677 aj++; 2678 } 2679 } 2680 PetscCall(VecRestoreArray(v, &x)); 2681 PetscFunctionReturn(PETSC_SUCCESS); 2682 } 2683 2684 static PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2685 { 2686 PetscFunctionBegin; 2687 /* If the two matrices have the same copy implementation, use fast copy. */ 2688 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2689 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2690 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2691 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2692 2693 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2694 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2695 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2696 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2697 } else { 2698 PetscCall(MatCopy_Basic(A, B, str)); 2699 } 2700 PetscFunctionReturn(PETSC_SUCCESS); 2701 } 2702 2703 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2704 { 2705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2706 2707 PetscFunctionBegin; 2708 *array = a->a; 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2713 { 2714 PetscFunctionBegin; 2715 *array = NULL; 2716 PetscFunctionReturn(PETSC_SUCCESS); 2717 } 2718 2719 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2720 { 2721 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2722 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2723 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2724 2725 PetscFunctionBegin; 2726 /* Set the number of nonzeros in the new matrix */ 2727 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2728 PetscFunctionReturn(PETSC_SUCCESS); 2729 } 2730 2731 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2732 { 2733 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2734 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2735 PetscBLASInt one = 1; 2736 2737 PetscFunctionBegin; 2738 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2739 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2740 if (e) { 2741 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2742 if (e) { 2743 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2744 if (e) str = SAME_NONZERO_PATTERN; 2745 } 2746 } 2747 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2748 } 2749 if (str == SAME_NONZERO_PATTERN) { 2750 PetscScalar alpha = a; 2751 PetscBLASInt bnz; 2752 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2753 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2754 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2755 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2756 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2757 } else { 2758 Mat B; 2759 PetscInt *nnz; 2760 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2761 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2762 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2763 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2764 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2765 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2766 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2767 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2768 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2769 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2770 PetscCall(MatHeaderMerge(Y, &B)); 2771 PetscCall(PetscFree(nnz)); 2772 } 2773 PetscFunctionReturn(PETSC_SUCCESS); 2774 } 2775 2776 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2777 { 2778 #if PetscDefined(USE_COMPLEX) 2779 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2780 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2781 MatScalar *aa = a->a; 2782 2783 PetscFunctionBegin; 2784 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2785 PetscFunctionReturn(PETSC_SUCCESS); 2786 #else 2787 (void)A; 2788 return PETSC_SUCCESS; 2789 #endif 2790 } 2791 2792 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2793 { 2794 #if PetscDefined(USE_COMPLEX) 2795 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2796 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2797 MatScalar *aa = a->a; 2798 2799 PetscFunctionBegin; 2800 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2801 PetscFunctionReturn(PETSC_SUCCESS); 2802 #else 2803 (void)A; 2804 return PETSC_SUCCESS; 2805 #endif 2806 } 2807 2808 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2809 { 2810 #if PetscDefined(USE_COMPLEX) 2811 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2812 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2813 MatScalar *aa = a->a; 2814 2815 PetscFunctionBegin; 2816 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2817 PetscFunctionReturn(PETSC_SUCCESS); 2818 #else 2819 (void)A; 2820 return PETSC_SUCCESS; 2821 #endif 2822 } 2823 2824 /* 2825 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2826 */ 2827 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2828 { 2829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2830 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2831 PetscInt nz = a->i[m], row, *jj, mr, col; 2832 2833 PetscFunctionBegin; 2834 *nn = n; 2835 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2836 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2837 PetscCall(PetscCalloc1(n, &collengths)); 2838 PetscCall(PetscMalloc1(n + 1, &cia)); 2839 PetscCall(PetscMalloc1(nz, &cja)); 2840 jj = a->j; 2841 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2842 cia[0] = oshift; 2843 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2844 PetscCall(PetscArrayzero(collengths, n)); 2845 jj = a->j; 2846 for (row = 0; row < m; row++) { 2847 mr = a->i[row + 1] - a->i[row]; 2848 for (i = 0; i < mr; i++) { 2849 col = *jj++; 2850 2851 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2852 } 2853 } 2854 PetscCall(PetscFree(collengths)); 2855 *ia = cia; 2856 *ja = cja; 2857 PetscFunctionReturn(PETSC_SUCCESS); 2858 } 2859 2860 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2861 { 2862 PetscFunctionBegin; 2863 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2864 PetscCall(PetscFree(*ia)); 2865 PetscCall(PetscFree(*ja)); 2866 PetscFunctionReturn(PETSC_SUCCESS); 2867 } 2868 2869 /* 2870 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2871 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2872 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2873 */ 2874 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2875 { 2876 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2877 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2878 PetscInt nz = a->i[m], row, *jj, mr, col; 2879 PetscInt *cspidx; 2880 2881 PetscFunctionBegin; 2882 *nn = n; 2883 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2884 2885 PetscCall(PetscCalloc1(n, &collengths)); 2886 PetscCall(PetscMalloc1(n + 1, &cia)); 2887 PetscCall(PetscMalloc1(nz, &cja)); 2888 PetscCall(PetscMalloc1(nz, &cspidx)); 2889 jj = a->j; 2890 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2891 cia[0] = oshift; 2892 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2893 PetscCall(PetscArrayzero(collengths, n)); 2894 jj = a->j; 2895 for (row = 0; row < m; row++) { 2896 mr = a->i[row + 1] - a->i[row]; 2897 for (i = 0; i < mr; i++) { 2898 col = *jj++; 2899 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2900 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2901 } 2902 } 2903 PetscCall(PetscFree(collengths)); 2904 *ia = cia; 2905 *ja = cja; 2906 *spidx = cspidx; 2907 PetscFunctionReturn(PETSC_SUCCESS); 2908 } 2909 2910 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2911 { 2912 PetscFunctionBegin; 2913 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2914 PetscCall(PetscFree(*spidx)); 2915 PetscFunctionReturn(PETSC_SUCCESS); 2916 } 2917 2918 static PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2919 { 2920 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2921 2922 PetscFunctionBegin; 2923 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2924 PetscCall(MatShift_Basic(Y, a)); 2925 PetscFunctionReturn(PETSC_SUCCESS); 2926 } 2927 2928 PetscErrorCode MatEliminateZeros_SeqBAIJ(Mat A, PetscBool keep) 2929 { 2930 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2931 PetscInt fshift = 0, fshift_prev = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax, j, k; 2932 PetscInt m = A->rmap->N, *ailen = a->ilen; 2933 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2934 MatScalar *aa = a->a, *ap; 2935 PetscBool zero; 2936 2937 PetscFunctionBegin; 2938 PetscCheck(A->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot eliminate zeros for unassembled matrix"); 2939 if (m) rmax = ailen[0]; 2940 for (i = 1; i <= mbs; i++) { 2941 for (k = ai[i - 1]; k < ai[i]; k++) { 2942 zero = PETSC_TRUE; 2943 ap = aa + bs2 * k; 2944 for (j = 0; j < bs2 && zero; j++) { 2945 if (ap[j] != 0.0) zero = PETSC_FALSE; 2946 } 2947 if (zero && (aj[k] != i - 1 || !keep)) fshift++; 2948 else { 2949 if (zero && aj[k] == i - 1) PetscCall(PetscInfo(A, "Keep the diagonal block at row %" PetscInt_FMT "\n", i - 1)); 2950 aj[k - fshift] = aj[k]; 2951 PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2)); 2952 } 2953 } 2954 ai[i - 1] -= fshift_prev; 2955 fshift_prev = fshift; 2956 ailen[i - 1] = imax[i - 1] = ai[i] - fshift - ai[i - 1]; 2957 a->nonzerorowcnt += ((ai[i] - fshift - ai[i - 1]) > 0); 2958 rmax = PetscMax(rmax, ailen[i - 1]); 2959 } 2960 if (fshift) { 2961 if (mbs) { 2962 ai[mbs] -= fshift; 2963 a->nz = ai[mbs]; 2964 } 2965 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT "; zeros eliminated: %" PetscInt_FMT "; nonzeros left: %" PetscInt_FMT "\n", m, A->cmap->n, fshift, a->nz)); 2966 A->nonzerostate++; 2967 A->info.nz_unneeded += (PetscReal)fshift; 2968 a->rmax = rmax; 2969 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2970 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2971 } 2972 PetscFunctionReturn(PETSC_SUCCESS); 2973 } 2974 2975 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2976 MatGetRow_SeqBAIJ, 2977 MatRestoreRow_SeqBAIJ, 2978 MatMult_SeqBAIJ_N, 2979 /* 4*/ MatMultAdd_SeqBAIJ_N, 2980 MatMultTranspose_SeqBAIJ, 2981 MatMultTransposeAdd_SeqBAIJ, 2982 NULL, 2983 NULL, 2984 NULL, 2985 /* 10*/ NULL, 2986 MatLUFactor_SeqBAIJ, 2987 NULL, 2988 NULL, 2989 MatTranspose_SeqBAIJ, 2990 /* 15*/ MatGetInfo_SeqBAIJ, 2991 MatEqual_SeqBAIJ, 2992 MatGetDiagonal_SeqBAIJ, 2993 MatDiagonalScale_SeqBAIJ, 2994 MatNorm_SeqBAIJ, 2995 /* 20*/ NULL, 2996 MatAssemblyEnd_SeqBAIJ, 2997 MatSetOption_SeqBAIJ, 2998 MatZeroEntries_SeqBAIJ, 2999 /* 24*/ MatZeroRows_SeqBAIJ, 3000 NULL, 3001 NULL, 3002 NULL, 3003 NULL, 3004 /* 29*/ MatSetUp_Seq_Hash, 3005 NULL, 3006 NULL, 3007 NULL, 3008 NULL, 3009 /* 34*/ MatDuplicate_SeqBAIJ, 3010 NULL, 3011 NULL, 3012 MatILUFactor_SeqBAIJ, 3013 NULL, 3014 /* 39*/ MatAXPY_SeqBAIJ, 3015 MatCreateSubMatrices_SeqBAIJ, 3016 MatIncreaseOverlap_SeqBAIJ, 3017 MatGetValues_SeqBAIJ, 3018 MatCopy_SeqBAIJ, 3019 /* 44*/ NULL, 3020 MatScale_SeqBAIJ, 3021 MatShift_SeqBAIJ, 3022 NULL, 3023 MatZeroRowsColumns_SeqBAIJ, 3024 /* 49*/ NULL, 3025 MatGetRowIJ_SeqBAIJ, 3026 MatRestoreRowIJ_SeqBAIJ, 3027 MatGetColumnIJ_SeqBAIJ, 3028 MatRestoreColumnIJ_SeqBAIJ, 3029 /* 54*/ MatFDColoringCreate_SeqXAIJ, 3030 NULL, 3031 NULL, 3032 NULL, 3033 MatSetValuesBlocked_SeqBAIJ, 3034 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 3035 MatDestroy_SeqBAIJ, 3036 MatView_SeqBAIJ, 3037 NULL, 3038 NULL, 3039 /* 64*/ NULL, 3040 NULL, 3041 NULL, 3042 NULL, 3043 NULL, 3044 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 3045 NULL, 3046 MatConvert_Basic, 3047 NULL, 3048 NULL, 3049 /* 74*/ NULL, 3050 MatFDColoringApply_BAIJ, 3051 NULL, 3052 NULL, 3053 NULL, 3054 /* 79*/ NULL, 3055 NULL, 3056 NULL, 3057 NULL, 3058 MatLoad_SeqBAIJ, 3059 /* 84*/ NULL, 3060 NULL, 3061 NULL, 3062 NULL, 3063 NULL, 3064 /* 89*/ NULL, 3065 NULL, 3066 NULL, 3067 NULL, 3068 NULL, 3069 /* 94*/ NULL, 3070 NULL, 3071 NULL, 3072 NULL, 3073 NULL, 3074 /* 99*/ NULL, 3075 NULL, 3076 NULL, 3077 MatConjugate_SeqBAIJ, 3078 NULL, 3079 /*104*/ NULL, 3080 MatRealPart_SeqBAIJ, 3081 MatImaginaryPart_SeqBAIJ, 3082 NULL, 3083 NULL, 3084 /*109*/ NULL, 3085 NULL, 3086 NULL, 3087 NULL, 3088 MatMissingDiagonal_SeqBAIJ, 3089 /*114*/ NULL, 3090 NULL, 3091 NULL, 3092 NULL, 3093 NULL, 3094 /*119*/ NULL, 3095 NULL, 3096 MatMultHermitianTranspose_SeqBAIJ, 3097 MatMultHermitianTransposeAdd_SeqBAIJ, 3098 NULL, 3099 /*124*/ NULL, 3100 MatGetColumnReductions_SeqBAIJ, 3101 MatInvertBlockDiagonal_SeqBAIJ, 3102 NULL, 3103 NULL, 3104 /*129*/ NULL, 3105 NULL, 3106 NULL, 3107 NULL, 3108 NULL, 3109 /*134*/ NULL, 3110 NULL, 3111 NULL, 3112 NULL, 3113 NULL, 3114 /*139*/ MatSetBlockSizes_Default, 3115 NULL, 3116 NULL, 3117 MatFDColoringSetUp_SeqXAIJ, 3118 NULL, 3119 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3120 MatDestroySubMatrices_SeqBAIJ, 3121 NULL, 3122 NULL, 3123 NULL, 3124 NULL, 3125 /*150*/ NULL, 3126 MatEliminateZeros_SeqBAIJ}; 3127 3128 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3129 { 3130 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3131 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3132 3133 PetscFunctionBegin; 3134 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3135 3136 /* allocate space for values if not already there */ 3137 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3138 3139 /* copy values over */ 3140 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3141 PetscFunctionReturn(PETSC_SUCCESS); 3142 } 3143 3144 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3145 { 3146 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3147 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3148 3149 PetscFunctionBegin; 3150 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3151 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3152 3153 /* copy values over */ 3154 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3155 PetscFunctionReturn(PETSC_SUCCESS); 3156 } 3157 3158 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3159 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3160 3161 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3162 { 3163 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3164 PetscInt i, mbs, nbs, bs2; 3165 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3166 3167 PetscFunctionBegin; 3168 if (B->hash_active) { 3169 PetscInt bs; 3170 B->ops[0] = b->cops; 3171 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3172 PetscCall(MatGetBlockSize(B, &bs)); 3173 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3174 PetscCall(PetscFree(b->dnz)); 3175 PetscCall(PetscFree(b->bdnz)); 3176 B->hash_active = PETSC_FALSE; 3177 } 3178 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3179 if (nz == MAT_SKIP_ALLOCATION) { 3180 skipallocation = PETSC_TRUE; 3181 nz = 0; 3182 } 3183 3184 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3185 PetscCall(PetscLayoutSetUp(B->rmap)); 3186 PetscCall(PetscLayoutSetUp(B->cmap)); 3187 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3188 3189 B->preallocated = PETSC_TRUE; 3190 3191 mbs = B->rmap->n / bs; 3192 nbs = B->cmap->n / bs; 3193 bs2 = bs * bs; 3194 3195 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3196 3197 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3198 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3199 if (nnz) { 3200 for (i = 0; i < mbs; i++) { 3201 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3202 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3203 } 3204 } 3205 3206 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3207 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3208 PetscOptionsEnd(); 3209 3210 if (!flg) { 3211 switch (bs) { 3212 case 1: 3213 B->ops->mult = MatMult_SeqBAIJ_1; 3214 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3215 break; 3216 case 2: 3217 B->ops->mult = MatMult_SeqBAIJ_2; 3218 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3219 break; 3220 case 3: 3221 B->ops->mult = MatMult_SeqBAIJ_3; 3222 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3223 break; 3224 case 4: 3225 B->ops->mult = MatMult_SeqBAIJ_4; 3226 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3227 break; 3228 case 5: 3229 B->ops->mult = MatMult_SeqBAIJ_5; 3230 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3231 break; 3232 case 6: 3233 B->ops->mult = MatMult_SeqBAIJ_6; 3234 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3235 break; 3236 case 7: 3237 B->ops->mult = MatMult_SeqBAIJ_7; 3238 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3239 break; 3240 case 9: { 3241 PetscInt version = 1; 3242 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3243 switch (version) { 3244 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3245 case 1: 3246 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3247 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3248 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3249 break; 3250 #endif 3251 default: 3252 B->ops->mult = MatMult_SeqBAIJ_N; 3253 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3254 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3255 break; 3256 } 3257 break; 3258 } 3259 case 11: 3260 B->ops->mult = MatMult_SeqBAIJ_11; 3261 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3262 break; 3263 case 12: { 3264 PetscInt version = 1; 3265 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3266 switch (version) { 3267 case 1: 3268 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3269 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3270 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3271 break; 3272 case 2: 3273 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3274 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3275 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3276 break; 3277 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3278 case 3: 3279 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3280 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3281 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3282 break; 3283 #endif 3284 default: 3285 B->ops->mult = MatMult_SeqBAIJ_N; 3286 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3287 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3288 break; 3289 } 3290 break; 3291 } 3292 case 15: { 3293 PetscInt version = 1; 3294 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3295 switch (version) { 3296 case 1: 3297 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3298 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3299 break; 3300 case 2: 3301 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3302 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3303 break; 3304 case 3: 3305 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3306 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3307 break; 3308 case 4: 3309 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3310 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3311 break; 3312 default: 3313 B->ops->mult = MatMult_SeqBAIJ_N; 3314 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3315 break; 3316 } 3317 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3318 break; 3319 } 3320 default: 3321 B->ops->mult = MatMult_SeqBAIJ_N; 3322 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3323 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3324 break; 3325 } 3326 } 3327 B->ops->sor = MatSOR_SeqBAIJ; 3328 b->mbs = mbs; 3329 b->nbs = nbs; 3330 if (!skipallocation) { 3331 if (!b->imax) { 3332 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3333 3334 b->free_imax_ilen = PETSC_TRUE; 3335 } 3336 /* b->ilen will count nonzeros in each block row so far. */ 3337 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3338 if (!nnz) { 3339 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3340 else if (nz < 0) nz = 1; 3341 nz = PetscMin(nz, nbs); 3342 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3343 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3344 } else { 3345 PetscInt64 nz64 = 0; 3346 for (i = 0; i < mbs; i++) { 3347 b->imax[i] = nnz[i]; 3348 nz64 += nnz[i]; 3349 } 3350 PetscCall(PetscIntCast(nz64, &nz)); 3351 } 3352 3353 /* allocate the matrix space */ 3354 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3355 if (B->structure_only) { 3356 PetscCall(PetscMalloc1(nz, &b->j)); 3357 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3358 } else { 3359 PetscInt nzbs2 = 0; 3360 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3361 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3362 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3363 } 3364 PetscCall(PetscArrayzero(b->j, nz)); 3365 3366 if (B->structure_only) { 3367 b->singlemalloc = PETSC_FALSE; 3368 b->free_a = PETSC_FALSE; 3369 } else { 3370 b->singlemalloc = PETSC_TRUE; 3371 b->free_a = PETSC_TRUE; 3372 } 3373 b->free_ij = PETSC_TRUE; 3374 3375 b->i[0] = 0; 3376 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3377 3378 } else { 3379 b->free_a = PETSC_FALSE; 3380 b->free_ij = PETSC_FALSE; 3381 } 3382 3383 b->bs2 = bs2; 3384 b->mbs = mbs; 3385 b->nz = 0; 3386 b->maxnz = nz; 3387 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3388 B->was_assembled = PETSC_FALSE; 3389 B->assembled = PETSC_FALSE; 3390 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3391 PetscFunctionReturn(PETSC_SUCCESS); 3392 } 3393 3394 static PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3395 { 3396 PetscInt i, m, nz, nz_max = 0, *nnz; 3397 PetscScalar *values = NULL; 3398 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3399 3400 PetscFunctionBegin; 3401 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3402 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3403 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3404 PetscCall(PetscLayoutSetUp(B->rmap)); 3405 PetscCall(PetscLayoutSetUp(B->cmap)); 3406 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3407 m = B->rmap->n / bs; 3408 3409 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3410 PetscCall(PetscMalloc1(m + 1, &nnz)); 3411 for (i = 0; i < m; i++) { 3412 nz = ii[i + 1] - ii[i]; 3413 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3414 nz_max = PetscMax(nz_max, nz); 3415 nnz[i] = nz; 3416 } 3417 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3418 PetscCall(PetscFree(nnz)); 3419 3420 values = (PetscScalar *)V; 3421 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3422 for (i = 0; i < m; i++) { 3423 PetscInt ncols = ii[i + 1] - ii[i]; 3424 const PetscInt *icols = jj + ii[i]; 3425 if (bs == 1 || !roworiented) { 3426 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3427 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3428 } else { 3429 PetscInt j; 3430 for (j = 0; j < ncols; j++) { 3431 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3432 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3433 } 3434 } 3435 } 3436 if (!V) PetscCall(PetscFree(values)); 3437 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3438 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3439 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3440 PetscFunctionReturn(PETSC_SUCCESS); 3441 } 3442 3443 /*@C 3444 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3445 3446 Not Collective 3447 3448 Input Parameter: 3449 . A - a `MATSEQBAIJ` matrix 3450 3451 Output Parameter: 3452 . array - pointer to the data 3453 3454 Level: intermediate 3455 3456 .seealso: [](ch_matrices), `Mat`, `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3457 @*/ 3458 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3459 { 3460 PetscFunctionBegin; 3461 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3462 PetscFunctionReturn(PETSC_SUCCESS); 3463 } 3464 3465 /*@C 3466 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3467 3468 Not Collective 3469 3470 Input Parameters: 3471 + A - a `MATSEQBAIJ` matrix 3472 - array - pointer to the data 3473 3474 Level: intermediate 3475 3476 .seealso: [](ch_matrices), `Mat`, `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3477 @*/ 3478 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3479 { 3480 PetscFunctionBegin; 3481 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3482 PetscFunctionReturn(PETSC_SUCCESS); 3483 } 3484 3485 /*MC 3486 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3487 block sparse compressed row format. 3488 3489 Options Database Keys: 3490 + -mat_type seqbaij - sets the matrix type to `MATSEQBAIJ` during a call to `MatSetFromOptions()` 3491 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3492 3493 Level: beginner 3494 3495 Notes: 3496 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3497 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3498 3499 Run with `-info` to see what version of the matrix-vector product is being used 3500 3501 .seealso: [](ch_matrices), `Mat`, `MatCreateSeqBAIJ()` 3502 M*/ 3503 3504 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3505 3506 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3507 { 3508 PetscMPIInt size; 3509 Mat_SeqBAIJ *b; 3510 3511 PetscFunctionBegin; 3512 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3513 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3514 3515 PetscCall(PetscNew(&b)); 3516 B->data = (void *)b; 3517 B->ops[0] = MatOps_Values; 3518 3519 b->row = NULL; 3520 b->col = NULL; 3521 b->icol = NULL; 3522 b->reallocs = 0; 3523 b->saved_values = NULL; 3524 3525 b->roworiented = PETSC_TRUE; 3526 b->nonew = 0; 3527 b->diag = NULL; 3528 B->spptr = NULL; 3529 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3530 b->keepnonzeropattern = PETSC_FALSE; 3531 3532 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3533 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3534 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3535 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3536 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3537 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3538 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3539 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3540 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3541 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3542 #if defined(PETSC_HAVE_HYPRE) 3543 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3544 #endif 3545 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3546 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3547 PetscFunctionReturn(PETSC_SUCCESS); 3548 } 3549 3550 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3551 { 3552 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3553 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3554 3555 PetscFunctionBegin; 3556 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONGSTATE, "Cannot duplicate unassembled matrix"); 3557 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3558 3559 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3560 c->imax = a->imax; 3561 c->ilen = a->ilen; 3562 c->free_imax_ilen = PETSC_FALSE; 3563 } else { 3564 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3565 for (i = 0; i < mbs; i++) { 3566 c->imax[i] = a->imax[i]; 3567 c->ilen[i] = a->ilen[i]; 3568 } 3569 c->free_imax_ilen = PETSC_TRUE; 3570 } 3571 3572 /* allocate the matrix space */ 3573 if (mallocmatspace) { 3574 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3575 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3576 3577 c->i = a->i; 3578 c->j = a->j; 3579 c->singlemalloc = PETSC_FALSE; 3580 c->free_a = PETSC_TRUE; 3581 c->free_ij = PETSC_FALSE; 3582 c->parent = A; 3583 C->preallocated = PETSC_TRUE; 3584 C->assembled = PETSC_TRUE; 3585 3586 PetscCall(PetscObjectReference((PetscObject)A)); 3587 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3588 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3589 } else { 3590 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3591 3592 c->singlemalloc = PETSC_TRUE; 3593 c->free_a = PETSC_TRUE; 3594 c->free_ij = PETSC_TRUE; 3595 3596 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3597 if (mbs > 0) { 3598 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3599 if (cpvalues == MAT_COPY_VALUES) { 3600 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3601 } else { 3602 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3603 } 3604 } 3605 C->preallocated = PETSC_TRUE; 3606 C->assembled = PETSC_TRUE; 3607 } 3608 } 3609 3610 c->roworiented = a->roworiented; 3611 c->nonew = a->nonew; 3612 3613 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3614 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3615 3616 c->bs2 = a->bs2; 3617 c->mbs = a->mbs; 3618 c->nbs = a->nbs; 3619 3620 if (a->diag) { 3621 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3622 c->diag = a->diag; 3623 c->free_diag = PETSC_FALSE; 3624 } else { 3625 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3626 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3627 c->free_diag = PETSC_TRUE; 3628 } 3629 } else c->diag = NULL; 3630 3631 c->nz = a->nz; 3632 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3633 c->solve_work = NULL; 3634 c->mult_work = NULL; 3635 c->sor_workt = NULL; 3636 c->sor_work = NULL; 3637 3638 c->compressedrow.use = a->compressedrow.use; 3639 c->compressedrow.nrows = a->compressedrow.nrows; 3640 if (a->compressedrow.use) { 3641 i = a->compressedrow.nrows; 3642 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3643 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3644 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3645 } else { 3646 c->compressedrow.use = PETSC_FALSE; 3647 c->compressedrow.i = NULL; 3648 c->compressedrow.rindex = NULL; 3649 } 3650 c->nonzerorowcnt = a->nonzerorowcnt; 3651 C->nonzerostate = A->nonzerostate; 3652 3653 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3654 PetscFunctionReturn(PETSC_SUCCESS); 3655 } 3656 3657 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3658 { 3659 PetscFunctionBegin; 3660 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3661 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3662 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3663 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3664 PetscFunctionReturn(PETSC_SUCCESS); 3665 } 3666 3667 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3668 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3669 { 3670 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3671 PetscInt *rowidxs, *colidxs; 3672 PetscScalar *matvals; 3673 3674 PetscFunctionBegin; 3675 PetscCall(PetscViewerSetUp(viewer)); 3676 3677 /* read matrix header */ 3678 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3679 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3680 M = header[1]; 3681 N = header[2]; 3682 nz = header[3]; 3683 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3684 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3685 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3686 3687 /* set block sizes from the viewer's .info file */ 3688 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3689 /* set local and global sizes if not set already */ 3690 if (mat->rmap->n < 0) mat->rmap->n = M; 3691 if (mat->cmap->n < 0) mat->cmap->n = N; 3692 if (mat->rmap->N < 0) mat->rmap->N = M; 3693 if (mat->cmap->N < 0) mat->cmap->N = N; 3694 PetscCall(PetscLayoutSetUp(mat->rmap)); 3695 PetscCall(PetscLayoutSetUp(mat->cmap)); 3696 3697 /* check if the matrix sizes are correct */ 3698 PetscCall(MatGetSize(mat, &rows, &cols)); 3699 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3700 PetscCall(MatGetBlockSize(mat, &bs)); 3701 PetscCall(MatGetLocalSize(mat, &m, &n)); 3702 mbs = m / bs; 3703 nbs = n / bs; 3704 3705 /* read in row lengths, column indices and nonzero values */ 3706 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3707 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3708 rowidxs[0] = 0; 3709 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3710 sum = rowidxs[m]; 3711 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3712 3713 /* read in column indices and nonzero values */ 3714 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3715 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3716 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3717 3718 { /* preallocate matrix storage */ 3719 PetscBT bt; /* helper bit set to count nonzeros */ 3720 PetscInt *nnz; 3721 PetscBool sbaij; 3722 3723 PetscCall(PetscBTCreate(nbs, &bt)); 3724 PetscCall(PetscCalloc1(mbs, &nnz)); 3725 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3726 for (i = 0; i < mbs; i++) { 3727 PetscCall(PetscBTMemzero(nbs, bt)); 3728 for (k = 0; k < bs; k++) { 3729 PetscInt row = bs * i + k; 3730 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3731 PetscInt col = colidxs[j]; 3732 if (!sbaij || col >= row) 3733 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3734 } 3735 } 3736 } 3737 PetscCall(PetscBTDestroy(&bt)); 3738 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3739 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3740 PetscCall(PetscFree(nnz)); 3741 } 3742 3743 /* store matrix values */ 3744 for (i = 0; i < m; i++) { 3745 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3746 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3747 } 3748 3749 PetscCall(PetscFree(rowidxs)); 3750 PetscCall(PetscFree2(colidxs, matvals)); 3751 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3752 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3753 PetscFunctionReturn(PETSC_SUCCESS); 3754 } 3755 3756 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3757 { 3758 PetscBool isbinary; 3759 3760 PetscFunctionBegin; 3761 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3762 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3763 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3764 PetscFunctionReturn(PETSC_SUCCESS); 3765 } 3766 3767 /*@C 3768 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3769 compressed row) format. For good matrix assembly performance the 3770 user should preallocate the matrix storage by setting the parameter `nz` 3771 (or the array `nnz`). 3772 3773 Collective 3774 3775 Input Parameters: 3776 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3777 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3778 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3779 . m - number of rows 3780 . n - number of columns 3781 . nz - number of nonzero blocks per block row (same for all rows) 3782 - nnz - array containing the number of nonzero blocks in the various block rows 3783 (possibly different for each block row) or `NULL` 3784 3785 Output Parameter: 3786 . A - the matrix 3787 3788 Options Database Keys: 3789 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3790 - -mat_block_size - size of the blocks to use 3791 3792 Level: intermediate 3793 3794 Notes: 3795 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3796 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3797 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3798 3799 The number of rows and columns must be divisible by blocksize. 3800 3801 If the `nnz` parameter is given then the `nz` parameter is ignored 3802 3803 A nonzero block is any block that as 1 or more nonzeros in it 3804 3805 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3806 storage. That is, the stored row and column indices can begin at 3807 either one (as in Fortran) or zero. 3808 3809 Specify the preallocated storage with either `nz` or `nnz` (not both). 3810 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3811 allocation. See [Sparse Matrices](sec_matsparse) for details. 3812 matrices. 3813 3814 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3815 @*/ 3816 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3817 { 3818 PetscFunctionBegin; 3819 PetscCall(MatCreate(comm, A)); 3820 PetscCall(MatSetSizes(*A, m, n, m, n)); 3821 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3822 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3823 PetscFunctionReturn(PETSC_SUCCESS); 3824 } 3825 3826 /*@C 3827 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3828 per row in the matrix. For good matrix assembly performance the 3829 user should preallocate the matrix storage by setting the parameter `nz` 3830 (or the array `nnz`). 3831 3832 Collective 3833 3834 Input Parameters: 3835 + B - the matrix 3836 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3837 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3838 . nz - number of block nonzeros per block row (same for all rows) 3839 - nnz - array containing the number of block nonzeros in the various block rows 3840 (possibly different for each block row) or `NULL` 3841 3842 Options Database Keys: 3843 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3844 - -mat_block_size - size of the blocks to use 3845 3846 Level: intermediate 3847 3848 Notes: 3849 If the `nnz` parameter is given then the `nz` parameter is ignored 3850 3851 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3852 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3853 You can also run with the option `-info` and look for messages with the string 3854 malloc in them to see if additional memory allocation was needed. 3855 3856 The `MATSEQBAIJ` format is fully compatible with standard Fortran 3857 storage. That is, the stored row and column indices can begin at 3858 either one (as in Fortran) or zero. 3859 3860 Specify the preallocated storage with either nz or nnz (not both). 3861 Set `nz` = `PETSC_DEFAULT` and `nnz` = `NULL` for PETSc to control dynamic memory 3862 allocation. See [Sparse Matrices](sec_matsparse) for details. 3863 3864 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3865 @*/ 3866 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3867 { 3868 PetscFunctionBegin; 3869 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3870 PetscValidType(B, 1); 3871 PetscValidLogicalCollectiveInt(B, bs, 2); 3872 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3873 PetscFunctionReturn(PETSC_SUCCESS); 3874 } 3875 3876 /*@C 3877 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3878 3879 Collective 3880 3881 Input Parameters: 3882 + B - the matrix 3883 . bs - the blocksize 3884 . i - the indices into `j` for the start of each local row (starts with zero) 3885 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3886 - v - optional values in the matrix 3887 3888 Level: advanced 3889 3890 Notes: 3891 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3892 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3893 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3894 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3895 block column and the second index is over columns within a block. 3896 3897 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3898 3899 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3900 @*/ 3901 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3902 { 3903 PetscFunctionBegin; 3904 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3905 PetscValidType(B, 1); 3906 PetscValidLogicalCollectiveInt(B, bs, 2); 3907 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3908 PetscFunctionReturn(PETSC_SUCCESS); 3909 } 3910 3911 /*@ 3912 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3913 3914 Collective 3915 3916 Input Parameters: 3917 + comm - must be an MPI communicator of size 1 3918 . bs - size of block 3919 . m - number of rows 3920 . n - number of columns 3921 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3922 . j - column indices 3923 - a - matrix values 3924 3925 Output Parameter: 3926 . mat - the matrix 3927 3928 Level: advanced 3929 3930 Notes: 3931 The `i`, `j`, and `a` arrays are not copied by this routine, the user must free these arrays 3932 once the matrix is destroyed 3933 3934 You cannot set new nonzero locations into this matrix, that will generate an error. 3935 3936 The `i` and `j` indices are 0 based 3937 3938 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3939 3940 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3941 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3942 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3943 with column-major ordering within blocks. 3944 3945 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3946 @*/ 3947 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3948 { 3949 Mat_SeqBAIJ *baij; 3950 3951 PetscFunctionBegin; 3952 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3953 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3954 3955 PetscCall(MatCreate(comm, mat)); 3956 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3957 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3958 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3959 baij = (Mat_SeqBAIJ *)(*mat)->data; 3960 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3961 3962 baij->i = i; 3963 baij->j = j; 3964 baij->a = a; 3965 3966 baij->singlemalloc = PETSC_FALSE; 3967 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3968 baij->free_a = PETSC_FALSE; 3969 baij->free_ij = PETSC_FALSE; 3970 baij->free_imax_ilen = PETSC_TRUE; 3971 3972 for (PetscInt ii = 0; ii < m; ii++) { 3973 const PetscInt row_len = i[ii + 1] - i[ii]; 3974 3975 baij->ilen[ii] = baij->imax[ii] = row_len; 3976 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3977 } 3978 if (PetscDefined(USE_DEBUG)) { 3979 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3980 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3981 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3982 } 3983 } 3984 3985 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3986 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3987 PetscFunctionReturn(PETSC_SUCCESS); 3988 } 3989 3990 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3991 { 3992 PetscFunctionBegin; 3993 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3994 PetscFunctionReturn(PETSC_SUCCESS); 3995 } 3996