1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 /* defines MatSetValues_Seq_Hash(), MatAssemblyEnd_Seq_Hash(), MatSetUp_Seq_Hash() */ 12 #define TYPE BAIJ 13 #define TYPE_BS 14 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 15 #undef TYPE_BS 16 #define TYPE_BS _BS 17 #define TYPE_BS_ON 18 #include "../src/mat/impls/aij/seq/seqhashmatsetvalues.h" 19 #undef TYPE_BS 20 #include "../src/mat/impls/aij/seq/seqhashmat.h" 21 #undef TYPE 22 #undef TYPE_BS_ON 23 24 #if defined(PETSC_HAVE_HYPRE) 25 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 26 #endif 27 28 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE) 29 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *); 30 #endif 31 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 32 33 static PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions) 34 { 35 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data; 36 PetscInt m, n, ib, jb, bs = A->rmap->bs; 37 MatScalar *a_val = a_aij->a; 38 39 PetscFunctionBegin; 40 PetscCall(MatGetSize(A, &m, &n)); 41 PetscCall(PetscArrayzero(reductions, n)); 42 if (type == NORM_2) { 43 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 44 for (jb = 0; jb < bs; jb++) { 45 for (ib = 0; ib < bs; ib++) { 46 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 47 a_val++; 48 } 49 } 50 } 51 } else if (type == NORM_1) { 52 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 53 for (jb = 0; jb < bs; jb++) { 54 for (ib = 0; ib < bs; ib++) { 55 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 56 a_val++; 57 } 58 } 59 } 60 } else if (type == NORM_INFINITY) { 61 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 62 for (jb = 0; jb < bs; jb++) { 63 for (ib = 0; ib < bs; ib++) { 64 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 65 reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]); 66 a_val++; 67 } 68 } 69 } 70 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 71 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 72 for (jb = 0; jb < bs; jb++) { 73 for (ib = 0; ib < bs; ib++) { 74 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 75 a_val++; 76 } 77 } 78 } 79 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 80 for (PetscInt i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) { 81 for (jb = 0; jb < bs; jb++) { 82 for (ib = 0; ib < bs; ib++) { 83 reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 84 a_val++; 85 } 86 } 87 } 88 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 89 if (type == NORM_2) { 90 for (PetscInt i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 91 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 92 for (PetscInt i = 0; i < n; i++) reductions[i] /= m; 93 } 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values) 98 { 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 100 PetscInt *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots; 101 MatScalar *v = a->a, *odiag, *diag, work[25], *v_work; 102 PetscReal shift = 0.0; 103 PetscBool allowzeropivot, zeropivotdetected = PETSC_FALSE; 104 105 PetscFunctionBegin; 106 allowzeropivot = PetscNot(A->erroriffailure); 107 108 if (a->idiagvalid) { 109 if (values) *values = a->idiag; 110 PetscFunctionReturn(PETSC_SUCCESS); 111 } 112 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 113 diag_offset = a->diag; 114 if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); } 115 diag = a->idiag; 116 if (values) *values = a->idiag; 117 /* factor and invert each block */ 118 switch (bs) { 119 case 1: 120 for (i = 0; i < mbs; i++) { 121 odiag = v + 1 * diag_offset[i]; 122 diag[0] = odiag[0]; 123 124 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 125 if (allowzeropivot) { 126 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 127 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 128 A->factorerror_zeropivot_row = i; 129 PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i)); 130 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON); 131 } 132 133 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 134 diag += 1; 135 } 136 break; 137 case 2: 138 for (i = 0; i < mbs; i++) { 139 odiag = v + 4 * diag_offset[i]; 140 diag[0] = odiag[0]; 141 diag[1] = odiag[1]; 142 diag[2] = odiag[2]; 143 diag[3] = odiag[3]; 144 PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected)); 145 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 146 diag += 4; 147 } 148 break; 149 case 3: 150 for (i = 0; i < mbs; i++) { 151 odiag = v + 9 * diag_offset[i]; 152 diag[0] = odiag[0]; 153 diag[1] = odiag[1]; 154 diag[2] = odiag[2]; 155 diag[3] = odiag[3]; 156 diag[4] = odiag[4]; 157 diag[5] = odiag[5]; 158 diag[6] = odiag[6]; 159 diag[7] = odiag[7]; 160 diag[8] = odiag[8]; 161 PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected)); 162 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 163 diag += 9; 164 } 165 break; 166 case 4: 167 for (i = 0; i < mbs; i++) { 168 odiag = v + 16 * diag_offset[i]; 169 PetscCall(PetscArraycpy(diag, odiag, 16)); 170 PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected)); 171 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 172 diag += 16; 173 } 174 break; 175 case 5: 176 for (i = 0; i < mbs; i++) { 177 odiag = v + 25 * diag_offset[i]; 178 PetscCall(PetscArraycpy(diag, odiag, 25)); 179 PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected)); 180 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 181 diag += 25; 182 } 183 break; 184 case 6: 185 for (i = 0; i < mbs; i++) { 186 odiag = v + 36 * diag_offset[i]; 187 PetscCall(PetscArraycpy(diag, odiag, 36)); 188 PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected)); 189 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 190 diag += 36; 191 } 192 break; 193 case 7: 194 for (i = 0; i < mbs; i++) { 195 odiag = v + 49 * diag_offset[i]; 196 PetscCall(PetscArraycpy(diag, odiag, 49)); 197 PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected)); 198 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 199 diag += 49; 200 } 201 break; 202 default: 203 PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots)); 204 for (i = 0; i < mbs; i++) { 205 odiag = v + bs2 * diag_offset[i]; 206 PetscCall(PetscArraycpy(diag, odiag, bs2)); 207 PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected)); 208 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 209 diag += bs2; 210 } 211 PetscCall(PetscFree2(v_work, v_pivots)); 212 } 213 a->idiagvalid = PETSC_TRUE; 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 218 { 219 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 220 PetscScalar *x, *work, *w, *workt, *t; 221 const MatScalar *v, *aa = a->a, *idiag; 222 const PetscScalar *b, *xb; 223 PetscScalar s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */ 224 PetscInt m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it; 225 const PetscInt *diag, *ai = a->i, *aj = a->j, *vi; 226 227 PetscFunctionBegin; 228 its = its * lits; 229 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 230 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 231 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 232 PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor"); 233 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 234 235 if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL)); 236 237 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 238 diag = a->diag; 239 idiag = a->idiag; 240 k = PetscMax(A->rmap->n, A->cmap->n); 241 if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work)); 242 if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt)); 243 if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work)); 244 work = a->mult_work; 245 t = a->sor_workt; 246 w = a->sor_work; 247 248 PetscCall(VecGetArray(xx, &x)); 249 PetscCall(VecGetArrayRead(bb, &b)); 250 251 if (flag & SOR_ZERO_INITIAL_GUESS) { 252 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 253 switch (bs) { 254 case 1: 255 PetscKernel_v_gets_A_times_w_1(x, idiag, b); 256 t[0] = b[0]; 257 i2 = 1; 258 idiag += 1; 259 for (i = 1; i < m; i++) { 260 v = aa + ai[i]; 261 vi = aj + ai[i]; 262 nz = diag[i] - ai[i]; 263 s[0] = b[i2]; 264 for (j = 0; j < nz; j++) { 265 xw[0] = x[vi[j]]; 266 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 267 } 268 t[i2] = s[0]; 269 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 270 x[i2] = xw[0]; 271 idiag += 1; 272 i2 += 1; 273 } 274 break; 275 case 2: 276 PetscKernel_v_gets_A_times_w_2(x, idiag, b); 277 t[0] = b[0]; 278 t[1] = b[1]; 279 i2 = 2; 280 idiag += 4; 281 for (i = 1; i < m; i++) { 282 v = aa + 4 * ai[i]; 283 vi = aj + ai[i]; 284 nz = diag[i] - ai[i]; 285 s[0] = b[i2]; 286 s[1] = b[i2 + 1]; 287 for (j = 0; j < nz; j++) { 288 idx = 2 * vi[j]; 289 it = 4 * j; 290 xw[0] = x[idx]; 291 xw[1] = x[1 + idx]; 292 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 293 } 294 t[i2] = s[0]; 295 t[i2 + 1] = s[1]; 296 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 297 x[i2] = xw[0]; 298 x[i2 + 1] = xw[1]; 299 idiag += 4; 300 i2 += 2; 301 } 302 break; 303 case 3: 304 PetscKernel_v_gets_A_times_w_3(x, idiag, b); 305 t[0] = b[0]; 306 t[1] = b[1]; 307 t[2] = b[2]; 308 i2 = 3; 309 idiag += 9; 310 for (i = 1; i < m; i++) { 311 v = aa + 9 * ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; 315 s[1] = b[i2 + 1]; 316 s[2] = b[i2 + 2]; 317 while (nz--) { 318 idx = 3 * (*vi++); 319 xw[0] = x[idx]; 320 xw[1] = x[1 + idx]; 321 xw[2] = x[2 + idx]; 322 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 323 v += 9; 324 } 325 t[i2] = s[0]; 326 t[i2 + 1] = s[1]; 327 t[i2 + 2] = s[2]; 328 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 329 x[i2] = xw[0]; 330 x[i2 + 1] = xw[1]; 331 x[i2 + 2] = xw[2]; 332 idiag += 9; 333 i2 += 3; 334 } 335 break; 336 case 4: 337 PetscKernel_v_gets_A_times_w_4(x, idiag, b); 338 t[0] = b[0]; 339 t[1] = b[1]; 340 t[2] = b[2]; 341 t[3] = b[3]; 342 i2 = 4; 343 idiag += 16; 344 for (i = 1; i < m; i++) { 345 v = aa + 16 * ai[i]; 346 vi = aj + ai[i]; 347 nz = diag[i] - ai[i]; 348 s[0] = b[i2]; 349 s[1] = b[i2 + 1]; 350 s[2] = b[i2 + 2]; 351 s[3] = b[i2 + 3]; 352 while (nz--) { 353 idx = 4 * (*vi++); 354 xw[0] = x[idx]; 355 xw[1] = x[1 + idx]; 356 xw[2] = x[2 + idx]; 357 xw[3] = x[3 + idx]; 358 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 359 v += 16; 360 } 361 t[i2] = s[0]; 362 t[i2 + 1] = s[1]; 363 t[i2 + 2] = s[2]; 364 t[i2 + 3] = s[3]; 365 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 366 x[i2] = xw[0]; 367 x[i2 + 1] = xw[1]; 368 x[i2 + 2] = xw[2]; 369 x[i2 + 3] = xw[3]; 370 idiag += 16; 371 i2 += 4; 372 } 373 break; 374 case 5: 375 PetscKernel_v_gets_A_times_w_5(x, idiag, b); 376 t[0] = b[0]; 377 t[1] = b[1]; 378 t[2] = b[2]; 379 t[3] = b[3]; 380 t[4] = b[4]; 381 i2 = 5; 382 idiag += 25; 383 for (i = 1; i < m; i++) { 384 v = aa + 25 * ai[i]; 385 vi = aj + ai[i]; 386 nz = diag[i] - ai[i]; 387 s[0] = b[i2]; 388 s[1] = b[i2 + 1]; 389 s[2] = b[i2 + 2]; 390 s[3] = b[i2 + 3]; 391 s[4] = b[i2 + 4]; 392 while (nz--) { 393 idx = 5 * (*vi++); 394 xw[0] = x[idx]; 395 xw[1] = x[1 + idx]; 396 xw[2] = x[2 + idx]; 397 xw[3] = x[3 + idx]; 398 xw[4] = x[4 + idx]; 399 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 400 v += 25; 401 } 402 t[i2] = s[0]; 403 t[i2 + 1] = s[1]; 404 t[i2 + 2] = s[2]; 405 t[i2 + 3] = s[3]; 406 t[i2 + 4] = s[4]; 407 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 408 x[i2] = xw[0]; 409 x[i2 + 1] = xw[1]; 410 x[i2 + 2] = xw[2]; 411 x[i2 + 3] = xw[3]; 412 x[i2 + 4] = xw[4]; 413 idiag += 25; 414 i2 += 5; 415 } 416 break; 417 case 6: 418 PetscKernel_v_gets_A_times_w_6(x, idiag, b); 419 t[0] = b[0]; 420 t[1] = b[1]; 421 t[2] = b[2]; 422 t[3] = b[3]; 423 t[4] = b[4]; 424 t[5] = b[5]; 425 i2 = 6; 426 idiag += 36; 427 for (i = 1; i < m; i++) { 428 v = aa + 36 * ai[i]; 429 vi = aj + ai[i]; 430 nz = diag[i] - ai[i]; 431 s[0] = b[i2]; 432 s[1] = b[i2 + 1]; 433 s[2] = b[i2 + 2]; 434 s[3] = b[i2 + 3]; 435 s[4] = b[i2 + 4]; 436 s[5] = b[i2 + 5]; 437 while (nz--) { 438 idx = 6 * (*vi++); 439 xw[0] = x[idx]; 440 xw[1] = x[1 + idx]; 441 xw[2] = x[2 + idx]; 442 xw[3] = x[3 + idx]; 443 xw[4] = x[4 + idx]; 444 xw[5] = x[5 + idx]; 445 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 446 v += 36; 447 } 448 t[i2] = s[0]; 449 t[i2 + 1] = s[1]; 450 t[i2 + 2] = s[2]; 451 t[i2 + 3] = s[3]; 452 t[i2 + 4] = s[4]; 453 t[i2 + 5] = s[5]; 454 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 455 x[i2] = xw[0]; 456 x[i2 + 1] = xw[1]; 457 x[i2 + 2] = xw[2]; 458 x[i2 + 3] = xw[3]; 459 x[i2 + 4] = xw[4]; 460 x[i2 + 5] = xw[5]; 461 idiag += 36; 462 i2 += 6; 463 } 464 break; 465 case 7: 466 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 467 t[0] = b[0]; 468 t[1] = b[1]; 469 t[2] = b[2]; 470 t[3] = b[3]; 471 t[4] = b[4]; 472 t[5] = b[5]; 473 t[6] = b[6]; 474 i2 = 7; 475 idiag += 49; 476 for (i = 1; i < m; i++) { 477 v = aa + 49 * ai[i]; 478 vi = aj + ai[i]; 479 nz = diag[i] - ai[i]; 480 s[0] = b[i2]; 481 s[1] = b[i2 + 1]; 482 s[2] = b[i2 + 2]; 483 s[3] = b[i2 + 3]; 484 s[4] = b[i2 + 4]; 485 s[5] = b[i2 + 5]; 486 s[6] = b[i2 + 6]; 487 while (nz--) { 488 idx = 7 * (*vi++); 489 xw[0] = x[idx]; 490 xw[1] = x[1 + idx]; 491 xw[2] = x[2 + idx]; 492 xw[3] = x[3 + idx]; 493 xw[4] = x[4 + idx]; 494 xw[5] = x[5 + idx]; 495 xw[6] = x[6 + idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 497 v += 49; 498 } 499 t[i2] = s[0]; 500 t[i2 + 1] = s[1]; 501 t[i2 + 2] = s[2]; 502 t[i2 + 3] = s[3]; 503 t[i2 + 4] = s[4]; 504 t[i2 + 5] = s[5]; 505 t[i2 + 6] = s[6]; 506 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 507 x[i2] = xw[0]; 508 x[i2 + 1] = xw[1]; 509 x[i2 + 2] = xw[2]; 510 x[i2 + 3] = xw[3]; 511 x[i2 + 4] = xw[4]; 512 x[i2 + 5] = xw[5]; 513 x[i2 + 6] = xw[6]; 514 idiag += 49; 515 i2 += 7; 516 } 517 break; 518 default: 519 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); 520 PetscCall(PetscArraycpy(t, b, bs)); 521 i2 = bs; 522 idiag += bs2; 523 for (i = 1; i < m; i++) { 524 v = aa + bs2 * ai[i]; 525 vi = aj + ai[i]; 526 nz = diag[i] - ai[i]; 527 528 PetscCall(PetscArraycpy(w, b + i2, bs)); 529 /* copy all rows of x that are needed into contiguous space */ 530 workt = work; 531 for (j = 0; j < nz; j++) { 532 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 533 workt += bs; 534 } 535 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 536 PetscCall(PetscArraycpy(t + i2, w, bs)); 537 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 538 539 idiag += bs2; 540 i2 += bs; 541 } 542 break; 543 } 544 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 545 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 546 xb = t; 547 } else xb = b; 548 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 549 idiag = a->idiag + bs2 * (a->mbs - 1); 550 i2 = bs * (m - 1); 551 switch (bs) { 552 case 1: 553 s[0] = xb[i2]; 554 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 555 x[i2] = xw[0]; 556 i2 -= 1; 557 for (i = m - 2; i >= 0; i--) { 558 v = aa + (diag[i] + 1); 559 vi = aj + diag[i] + 1; 560 nz = ai[i + 1] - diag[i] - 1; 561 s[0] = xb[i2]; 562 for (j = 0; j < nz; j++) { 563 xw[0] = x[vi[j]]; 564 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 565 } 566 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 567 x[i2] = xw[0]; 568 idiag -= 1; 569 i2 -= 1; 570 } 571 break; 572 case 2: 573 s[0] = xb[i2]; 574 s[1] = xb[i2 + 1]; 575 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 576 x[i2] = xw[0]; 577 x[i2 + 1] = xw[1]; 578 i2 -= 2; 579 idiag -= 4; 580 for (i = m - 2; i >= 0; i--) { 581 v = aa + 4 * (diag[i] + 1); 582 vi = aj + diag[i] + 1; 583 nz = ai[i + 1] - diag[i] - 1; 584 s[0] = xb[i2]; 585 s[1] = xb[i2 + 1]; 586 for (j = 0; j < nz; j++) { 587 idx = 2 * vi[j]; 588 it = 4 * j; 589 xw[0] = x[idx]; 590 xw[1] = x[1 + idx]; 591 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 592 } 593 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 594 x[i2] = xw[0]; 595 x[i2 + 1] = xw[1]; 596 idiag -= 4; 597 i2 -= 2; 598 } 599 break; 600 case 3: 601 s[0] = xb[i2]; 602 s[1] = xb[i2 + 1]; 603 s[2] = xb[i2 + 2]; 604 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 605 x[i2] = xw[0]; 606 x[i2 + 1] = xw[1]; 607 x[i2 + 2] = xw[2]; 608 i2 -= 3; 609 idiag -= 9; 610 for (i = m - 2; i >= 0; i--) { 611 v = aa + 9 * (diag[i] + 1); 612 vi = aj + diag[i] + 1; 613 nz = ai[i + 1] - diag[i] - 1; 614 s[0] = xb[i2]; 615 s[1] = xb[i2 + 1]; 616 s[2] = xb[i2 + 2]; 617 while (nz--) { 618 idx = 3 * (*vi++); 619 xw[0] = x[idx]; 620 xw[1] = x[1 + idx]; 621 xw[2] = x[2 + idx]; 622 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 623 v += 9; 624 } 625 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 626 x[i2] = xw[0]; 627 x[i2 + 1] = xw[1]; 628 x[i2 + 2] = xw[2]; 629 idiag -= 9; 630 i2 -= 3; 631 } 632 break; 633 case 4: 634 s[0] = xb[i2]; 635 s[1] = xb[i2 + 1]; 636 s[2] = xb[i2 + 2]; 637 s[3] = xb[i2 + 3]; 638 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 639 x[i2] = xw[0]; 640 x[i2 + 1] = xw[1]; 641 x[i2 + 2] = xw[2]; 642 x[i2 + 3] = xw[3]; 643 i2 -= 4; 644 idiag -= 16; 645 for (i = m - 2; i >= 0; i--) { 646 v = aa + 16 * (diag[i] + 1); 647 vi = aj + diag[i] + 1; 648 nz = ai[i + 1] - diag[i] - 1; 649 s[0] = xb[i2]; 650 s[1] = xb[i2 + 1]; 651 s[2] = xb[i2 + 2]; 652 s[3] = xb[i2 + 3]; 653 while (nz--) { 654 idx = 4 * (*vi++); 655 xw[0] = x[idx]; 656 xw[1] = x[1 + idx]; 657 xw[2] = x[2 + idx]; 658 xw[3] = x[3 + idx]; 659 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 660 v += 16; 661 } 662 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 663 x[i2] = xw[0]; 664 x[i2 + 1] = xw[1]; 665 x[i2 + 2] = xw[2]; 666 x[i2 + 3] = xw[3]; 667 idiag -= 16; 668 i2 -= 4; 669 } 670 break; 671 case 5: 672 s[0] = xb[i2]; 673 s[1] = xb[i2 + 1]; 674 s[2] = xb[i2 + 2]; 675 s[3] = xb[i2 + 3]; 676 s[4] = xb[i2 + 4]; 677 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 678 x[i2] = xw[0]; 679 x[i2 + 1] = xw[1]; 680 x[i2 + 2] = xw[2]; 681 x[i2 + 3] = xw[3]; 682 x[i2 + 4] = xw[4]; 683 i2 -= 5; 684 idiag -= 25; 685 for (i = m - 2; i >= 0; i--) { 686 v = aa + 25 * (diag[i] + 1); 687 vi = aj + diag[i] + 1; 688 nz = ai[i + 1] - diag[i] - 1; 689 s[0] = xb[i2]; 690 s[1] = xb[i2 + 1]; 691 s[2] = xb[i2 + 2]; 692 s[3] = xb[i2 + 3]; 693 s[4] = xb[i2 + 4]; 694 while (nz--) { 695 idx = 5 * (*vi++); 696 xw[0] = x[idx]; 697 xw[1] = x[1 + idx]; 698 xw[2] = x[2 + idx]; 699 xw[3] = x[3 + idx]; 700 xw[4] = x[4 + idx]; 701 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 702 v += 25; 703 } 704 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 705 x[i2] = xw[0]; 706 x[i2 + 1] = xw[1]; 707 x[i2 + 2] = xw[2]; 708 x[i2 + 3] = xw[3]; 709 x[i2 + 4] = xw[4]; 710 idiag -= 25; 711 i2 -= 5; 712 } 713 break; 714 case 6: 715 s[0] = xb[i2]; 716 s[1] = xb[i2 + 1]; 717 s[2] = xb[i2 + 2]; 718 s[3] = xb[i2 + 3]; 719 s[4] = xb[i2 + 4]; 720 s[5] = xb[i2 + 5]; 721 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 722 x[i2] = xw[0]; 723 x[i2 + 1] = xw[1]; 724 x[i2 + 2] = xw[2]; 725 x[i2 + 3] = xw[3]; 726 x[i2 + 4] = xw[4]; 727 x[i2 + 5] = xw[5]; 728 i2 -= 6; 729 idiag -= 36; 730 for (i = m - 2; i >= 0; i--) { 731 v = aa + 36 * (diag[i] + 1); 732 vi = aj + diag[i] + 1; 733 nz = ai[i + 1] - diag[i] - 1; 734 s[0] = xb[i2]; 735 s[1] = xb[i2 + 1]; 736 s[2] = xb[i2 + 2]; 737 s[3] = xb[i2 + 3]; 738 s[4] = xb[i2 + 4]; 739 s[5] = xb[i2 + 5]; 740 while (nz--) { 741 idx = 6 * (*vi++); 742 xw[0] = x[idx]; 743 xw[1] = x[1 + idx]; 744 xw[2] = x[2 + idx]; 745 xw[3] = x[3 + idx]; 746 xw[4] = x[4 + idx]; 747 xw[5] = x[5 + idx]; 748 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 749 v += 36; 750 } 751 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 752 x[i2] = xw[0]; 753 x[i2 + 1] = xw[1]; 754 x[i2 + 2] = xw[2]; 755 x[i2 + 3] = xw[3]; 756 x[i2 + 4] = xw[4]; 757 x[i2 + 5] = xw[5]; 758 idiag -= 36; 759 i2 -= 6; 760 } 761 break; 762 case 7: 763 s[0] = xb[i2]; 764 s[1] = xb[i2 + 1]; 765 s[2] = xb[i2 + 2]; 766 s[3] = xb[i2 + 3]; 767 s[4] = xb[i2 + 4]; 768 s[5] = xb[i2 + 5]; 769 s[6] = xb[i2 + 6]; 770 PetscKernel_v_gets_A_times_w_7(x, idiag, b); 771 x[i2] = xw[0]; 772 x[i2 + 1] = xw[1]; 773 x[i2 + 2] = xw[2]; 774 x[i2 + 3] = xw[3]; 775 x[i2 + 4] = xw[4]; 776 x[i2 + 5] = xw[5]; 777 x[i2 + 6] = xw[6]; 778 i2 -= 7; 779 idiag -= 49; 780 for (i = m - 2; i >= 0; i--) { 781 v = aa + 49 * (diag[i] + 1); 782 vi = aj + diag[i] + 1; 783 nz = ai[i + 1] - diag[i] - 1; 784 s[0] = xb[i2]; 785 s[1] = xb[i2 + 1]; 786 s[2] = xb[i2 + 2]; 787 s[3] = xb[i2 + 3]; 788 s[4] = xb[i2 + 4]; 789 s[5] = xb[i2 + 5]; 790 s[6] = xb[i2 + 6]; 791 while (nz--) { 792 idx = 7 * (*vi++); 793 xw[0] = x[idx]; 794 xw[1] = x[1 + idx]; 795 xw[2] = x[2 + idx]; 796 xw[3] = x[3 + idx]; 797 xw[4] = x[4 + idx]; 798 xw[5] = x[5 + idx]; 799 xw[6] = x[6 + idx]; 800 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 801 v += 49; 802 } 803 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 804 x[i2] = xw[0]; 805 x[i2 + 1] = xw[1]; 806 x[i2 + 2] = xw[2]; 807 x[i2 + 3] = xw[3]; 808 x[i2 + 4] = xw[4]; 809 x[i2 + 5] = xw[5]; 810 x[i2 + 6] = xw[6]; 811 idiag -= 49; 812 i2 -= 7; 813 } 814 break; 815 default: 816 PetscCall(PetscArraycpy(w, xb + i2, bs)); 817 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 818 i2 -= bs; 819 idiag -= bs2; 820 for (i = m - 2; i >= 0; i--) { 821 v = aa + bs2 * (diag[i] + 1); 822 vi = aj + diag[i] + 1; 823 nz = ai[i + 1] - diag[i] - 1; 824 825 PetscCall(PetscArraycpy(w, xb + i2, bs)); 826 /* copy all rows of x that are needed into contiguous space */ 827 workt = work; 828 for (j = 0; j < nz; j++) { 829 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 830 workt += bs; 831 } 832 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 833 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 834 835 idiag -= bs2; 836 i2 -= bs; 837 } 838 break; 839 } 840 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 841 } 842 its--; 843 } 844 while (its--) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846 idiag = a->idiag; 847 i2 = 0; 848 switch (bs) { 849 case 1: 850 for (i = 0; i < m; i++) { 851 v = aa + ai[i]; 852 vi = aj + ai[i]; 853 nz = ai[i + 1] - ai[i]; 854 s[0] = b[i2]; 855 for (j = 0; j < nz; j++) { 856 xw[0] = x[vi[j]]; 857 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 858 } 859 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 860 x[i2] += xw[0]; 861 idiag += 1; 862 i2 += 1; 863 } 864 break; 865 case 2: 866 for (i = 0; i < m; i++) { 867 v = aa + 4 * ai[i]; 868 vi = aj + ai[i]; 869 nz = ai[i + 1] - ai[i]; 870 s[0] = b[i2]; 871 s[1] = b[i2 + 1]; 872 for (j = 0; j < nz; j++) { 873 idx = 2 * vi[j]; 874 it = 4 * j; 875 xw[0] = x[idx]; 876 xw[1] = x[1 + idx]; 877 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 878 } 879 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 880 x[i2] += xw[0]; 881 x[i2 + 1] += xw[1]; 882 idiag += 4; 883 i2 += 2; 884 } 885 break; 886 case 3: 887 for (i = 0; i < m; i++) { 888 v = aa + 9 * ai[i]; 889 vi = aj + ai[i]; 890 nz = ai[i + 1] - ai[i]; 891 s[0] = b[i2]; 892 s[1] = b[i2 + 1]; 893 s[2] = b[i2 + 2]; 894 while (nz--) { 895 idx = 3 * (*vi++); 896 xw[0] = x[idx]; 897 xw[1] = x[1 + idx]; 898 xw[2] = x[2 + idx]; 899 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 900 v += 9; 901 } 902 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 903 x[i2] += xw[0]; 904 x[i2 + 1] += xw[1]; 905 x[i2 + 2] += xw[2]; 906 idiag += 9; 907 i2 += 3; 908 } 909 break; 910 case 4: 911 for (i = 0; i < m; i++) { 912 v = aa + 16 * ai[i]; 913 vi = aj + ai[i]; 914 nz = ai[i + 1] - ai[i]; 915 s[0] = b[i2]; 916 s[1] = b[i2 + 1]; 917 s[2] = b[i2 + 2]; 918 s[3] = b[i2 + 3]; 919 while (nz--) { 920 idx = 4 * (*vi++); 921 xw[0] = x[idx]; 922 xw[1] = x[1 + idx]; 923 xw[2] = x[2 + idx]; 924 xw[3] = x[3 + idx]; 925 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 926 v += 16; 927 } 928 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 929 x[i2] += xw[0]; 930 x[i2 + 1] += xw[1]; 931 x[i2 + 2] += xw[2]; 932 x[i2 + 3] += xw[3]; 933 idiag += 16; 934 i2 += 4; 935 } 936 break; 937 case 5: 938 for (i = 0; i < m; i++) { 939 v = aa + 25 * ai[i]; 940 vi = aj + ai[i]; 941 nz = ai[i + 1] - ai[i]; 942 s[0] = b[i2]; 943 s[1] = b[i2 + 1]; 944 s[2] = b[i2 + 2]; 945 s[3] = b[i2 + 3]; 946 s[4] = b[i2 + 4]; 947 while (nz--) { 948 idx = 5 * (*vi++); 949 xw[0] = x[idx]; 950 xw[1] = x[1 + idx]; 951 xw[2] = x[2 + idx]; 952 xw[3] = x[3 + idx]; 953 xw[4] = x[4 + idx]; 954 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 955 v += 25; 956 } 957 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 958 x[i2] += xw[0]; 959 x[i2 + 1] += xw[1]; 960 x[i2 + 2] += xw[2]; 961 x[i2 + 3] += xw[3]; 962 x[i2 + 4] += xw[4]; 963 idiag += 25; 964 i2 += 5; 965 } 966 break; 967 case 6: 968 for (i = 0; i < m; i++) { 969 v = aa + 36 * ai[i]; 970 vi = aj + ai[i]; 971 nz = ai[i + 1] - ai[i]; 972 s[0] = b[i2]; 973 s[1] = b[i2 + 1]; 974 s[2] = b[i2 + 2]; 975 s[3] = b[i2 + 3]; 976 s[4] = b[i2 + 4]; 977 s[5] = b[i2 + 5]; 978 while (nz--) { 979 idx = 6 * (*vi++); 980 xw[0] = x[idx]; 981 xw[1] = x[1 + idx]; 982 xw[2] = x[2 + idx]; 983 xw[3] = x[3 + idx]; 984 xw[4] = x[4 + idx]; 985 xw[5] = x[5 + idx]; 986 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 987 v += 36; 988 } 989 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 990 x[i2] += xw[0]; 991 x[i2 + 1] += xw[1]; 992 x[i2 + 2] += xw[2]; 993 x[i2 + 3] += xw[3]; 994 x[i2 + 4] += xw[4]; 995 x[i2 + 5] += xw[5]; 996 idiag += 36; 997 i2 += 6; 998 } 999 break; 1000 case 7: 1001 for (i = 0; i < m; i++) { 1002 v = aa + 49 * ai[i]; 1003 vi = aj + ai[i]; 1004 nz = ai[i + 1] - ai[i]; 1005 s[0] = b[i2]; 1006 s[1] = b[i2 + 1]; 1007 s[2] = b[i2 + 2]; 1008 s[3] = b[i2 + 3]; 1009 s[4] = b[i2 + 4]; 1010 s[5] = b[i2 + 5]; 1011 s[6] = b[i2 + 6]; 1012 while (nz--) { 1013 idx = 7 * (*vi++); 1014 xw[0] = x[idx]; 1015 xw[1] = x[1 + idx]; 1016 xw[2] = x[2 + idx]; 1017 xw[3] = x[3 + idx]; 1018 xw[4] = x[4 + idx]; 1019 xw[5] = x[5 + idx]; 1020 xw[6] = x[6 + idx]; 1021 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1022 v += 49; 1023 } 1024 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1025 x[i2] += xw[0]; 1026 x[i2 + 1] += xw[1]; 1027 x[i2 + 2] += xw[2]; 1028 x[i2 + 3] += xw[3]; 1029 x[i2 + 4] += xw[4]; 1030 x[i2 + 5] += xw[5]; 1031 x[i2 + 6] += xw[6]; 1032 idiag += 49; 1033 i2 += 7; 1034 } 1035 break; 1036 default: 1037 for (i = 0; i < m; i++) { 1038 v = aa + bs2 * ai[i]; 1039 vi = aj + ai[i]; 1040 nz = ai[i + 1] - ai[i]; 1041 1042 PetscCall(PetscArraycpy(w, b + i2, bs)); 1043 /* copy all rows of x that are needed into contiguous space */ 1044 workt = work; 1045 for (j = 0; j < nz; j++) { 1046 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1047 workt += bs; 1048 } 1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1050 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1051 1052 idiag += bs2; 1053 i2 += bs; 1054 } 1055 break; 1056 } 1057 PetscCall(PetscLogFlops(2.0 * bs2 * a->nz)); 1058 } 1059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1060 idiag = a->idiag + bs2 * (a->mbs - 1); 1061 i2 = bs * (m - 1); 1062 switch (bs) { 1063 case 1: 1064 for (i = m - 1; i >= 0; i--) { 1065 v = aa + ai[i]; 1066 vi = aj + ai[i]; 1067 nz = ai[i + 1] - ai[i]; 1068 s[0] = b[i2]; 1069 for (j = 0; j < nz; j++) { 1070 xw[0] = x[vi[j]]; 1071 PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw); 1072 } 1073 PetscKernel_v_gets_A_times_w_1(xw, idiag, s); 1074 x[i2] += xw[0]; 1075 idiag -= 1; 1076 i2 -= 1; 1077 } 1078 break; 1079 case 2: 1080 for (i = m - 1; i >= 0; i--) { 1081 v = aa + 4 * ai[i]; 1082 vi = aj + ai[i]; 1083 nz = ai[i + 1] - ai[i]; 1084 s[0] = b[i2]; 1085 s[1] = b[i2 + 1]; 1086 for (j = 0; j < nz; j++) { 1087 idx = 2 * vi[j]; 1088 it = 4 * j; 1089 xw[0] = x[idx]; 1090 xw[1] = x[1 + idx]; 1091 PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw); 1092 } 1093 PetscKernel_v_gets_A_times_w_2(xw, idiag, s); 1094 x[i2] += xw[0]; 1095 x[i2 + 1] += xw[1]; 1096 idiag -= 4; 1097 i2 -= 2; 1098 } 1099 break; 1100 case 3: 1101 for (i = m - 1; i >= 0; i--) { 1102 v = aa + 9 * ai[i]; 1103 vi = aj + ai[i]; 1104 nz = ai[i + 1] - ai[i]; 1105 s[0] = b[i2]; 1106 s[1] = b[i2 + 1]; 1107 s[2] = b[i2 + 2]; 1108 while (nz--) { 1109 idx = 3 * (*vi++); 1110 xw[0] = x[idx]; 1111 xw[1] = x[1 + idx]; 1112 xw[2] = x[2 + idx]; 1113 PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw); 1114 v += 9; 1115 } 1116 PetscKernel_v_gets_A_times_w_3(xw, idiag, s); 1117 x[i2] += xw[0]; 1118 x[i2 + 1] += xw[1]; 1119 x[i2 + 2] += xw[2]; 1120 idiag -= 9; 1121 i2 -= 3; 1122 } 1123 break; 1124 case 4: 1125 for (i = m - 1; i >= 0; i--) { 1126 v = aa + 16 * ai[i]; 1127 vi = aj + ai[i]; 1128 nz = ai[i + 1] - ai[i]; 1129 s[0] = b[i2]; 1130 s[1] = b[i2 + 1]; 1131 s[2] = b[i2 + 2]; 1132 s[3] = b[i2 + 3]; 1133 while (nz--) { 1134 idx = 4 * (*vi++); 1135 xw[0] = x[idx]; 1136 xw[1] = x[1 + idx]; 1137 xw[2] = x[2 + idx]; 1138 xw[3] = x[3 + idx]; 1139 PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw); 1140 v += 16; 1141 } 1142 PetscKernel_v_gets_A_times_w_4(xw, idiag, s); 1143 x[i2] += xw[0]; 1144 x[i2 + 1] += xw[1]; 1145 x[i2 + 2] += xw[2]; 1146 x[i2 + 3] += xw[3]; 1147 idiag -= 16; 1148 i2 -= 4; 1149 } 1150 break; 1151 case 5: 1152 for (i = m - 1; i >= 0; i--) { 1153 v = aa + 25 * ai[i]; 1154 vi = aj + ai[i]; 1155 nz = ai[i + 1] - ai[i]; 1156 s[0] = b[i2]; 1157 s[1] = b[i2 + 1]; 1158 s[2] = b[i2 + 2]; 1159 s[3] = b[i2 + 3]; 1160 s[4] = b[i2 + 4]; 1161 while (nz--) { 1162 idx = 5 * (*vi++); 1163 xw[0] = x[idx]; 1164 xw[1] = x[1 + idx]; 1165 xw[2] = x[2 + idx]; 1166 xw[3] = x[3 + idx]; 1167 xw[4] = x[4 + idx]; 1168 PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw); 1169 v += 25; 1170 } 1171 PetscKernel_v_gets_A_times_w_5(xw, idiag, s); 1172 x[i2] += xw[0]; 1173 x[i2 + 1] += xw[1]; 1174 x[i2 + 2] += xw[2]; 1175 x[i2 + 3] += xw[3]; 1176 x[i2 + 4] += xw[4]; 1177 idiag -= 25; 1178 i2 -= 5; 1179 } 1180 break; 1181 case 6: 1182 for (i = m - 1; i >= 0; i--) { 1183 v = aa + 36 * ai[i]; 1184 vi = aj + ai[i]; 1185 nz = ai[i + 1] - ai[i]; 1186 s[0] = b[i2]; 1187 s[1] = b[i2 + 1]; 1188 s[2] = b[i2 + 2]; 1189 s[3] = b[i2 + 3]; 1190 s[4] = b[i2 + 4]; 1191 s[5] = b[i2 + 5]; 1192 while (nz--) { 1193 idx = 6 * (*vi++); 1194 xw[0] = x[idx]; 1195 xw[1] = x[1 + idx]; 1196 xw[2] = x[2 + idx]; 1197 xw[3] = x[3 + idx]; 1198 xw[4] = x[4 + idx]; 1199 xw[5] = x[5 + idx]; 1200 PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw); 1201 v += 36; 1202 } 1203 PetscKernel_v_gets_A_times_w_6(xw, idiag, s); 1204 x[i2] += xw[0]; 1205 x[i2 + 1] += xw[1]; 1206 x[i2 + 2] += xw[2]; 1207 x[i2 + 3] += xw[3]; 1208 x[i2 + 4] += xw[4]; 1209 x[i2 + 5] += xw[5]; 1210 idiag -= 36; 1211 i2 -= 6; 1212 } 1213 break; 1214 case 7: 1215 for (i = m - 1; i >= 0; i--) { 1216 v = aa + 49 * ai[i]; 1217 vi = aj + ai[i]; 1218 nz = ai[i + 1] - ai[i]; 1219 s[0] = b[i2]; 1220 s[1] = b[i2 + 1]; 1221 s[2] = b[i2 + 2]; 1222 s[3] = b[i2 + 3]; 1223 s[4] = b[i2 + 4]; 1224 s[5] = b[i2 + 5]; 1225 s[6] = b[i2 + 6]; 1226 while (nz--) { 1227 idx = 7 * (*vi++); 1228 xw[0] = x[idx]; 1229 xw[1] = x[1 + idx]; 1230 xw[2] = x[2 + idx]; 1231 xw[3] = x[3 + idx]; 1232 xw[4] = x[4 + idx]; 1233 xw[5] = x[5 + idx]; 1234 xw[6] = x[6 + idx]; 1235 PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw); 1236 v += 49; 1237 } 1238 PetscKernel_v_gets_A_times_w_7(xw, idiag, s); 1239 x[i2] += xw[0]; 1240 x[i2 + 1] += xw[1]; 1241 x[i2 + 2] += xw[2]; 1242 x[i2 + 3] += xw[3]; 1243 x[i2 + 4] += xw[4]; 1244 x[i2 + 5] += xw[5]; 1245 x[i2 + 6] += xw[6]; 1246 idiag -= 49; 1247 i2 -= 7; 1248 } 1249 break; 1250 default: 1251 for (i = m - 1; i >= 0; i--) { 1252 v = aa + bs2 * ai[i]; 1253 vi = aj + ai[i]; 1254 nz = ai[i + 1] - ai[i]; 1255 1256 PetscCall(PetscArraycpy(w, b + i2, bs)); 1257 /* copy all rows of x that are needed into contiguous space */ 1258 workt = work; 1259 for (j = 0; j < nz; j++) { 1260 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1261 workt += bs; 1262 } 1263 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work); 1264 PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2); 1265 1266 idiag -= bs2; 1267 i2 -= bs; 1268 } 1269 break; 1270 } 1271 PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz))); 1272 } 1273 } 1274 PetscCall(VecRestoreArray(xx, &x)); 1275 PetscCall(VecRestoreArrayRead(bb, &b)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 /* 1280 Special version for direct calls from Fortran (Used in PETSc-fun3d) 1281 */ 1282 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1283 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 1284 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1285 #define matsetvaluesblocked4_ matsetvaluesblocked4 1286 #endif 1287 1288 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[]) 1289 { 1290 Mat A = *AA; 1291 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1292 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn; 1293 PetscInt *ai = a->i, *ailen = a->ilen; 1294 PetscInt *aj = a->j, stepval, lastcol = -1; 1295 const PetscScalar *value = v; 1296 MatScalar *ap, *aa = a->a, *bap; 1297 1298 PetscFunctionBegin; 1299 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4"); 1300 stepval = (n - 1) * 4; 1301 for (k = 0; k < m; k++) { /* loop over added rows */ 1302 row = im[k]; 1303 rp = aj + ai[row]; 1304 ap = aa + 16 * ai[row]; 1305 nrow = ailen[row]; 1306 low = 0; 1307 high = nrow; 1308 for (l = 0; l < n; l++) { /* loop over added columns */ 1309 col = in[l]; 1310 if (col <= lastcol) low = 0; 1311 else high = nrow; 1312 lastcol = col; 1313 value = v + k * (stepval + 4 + l) * 4; 1314 while (high - low > 7) { 1315 t = (low + high) / 2; 1316 if (rp[t] > col) high = t; 1317 else low = t; 1318 } 1319 for (i = low; i < high; i++) { 1320 if (rp[i] > col) break; 1321 if (rp[i] == col) { 1322 bap = ap + 16 * i; 1323 for (ii = 0; ii < 4; ii++, value += stepval) { 1324 for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++; 1325 } 1326 goto noinsert2; 1327 } 1328 } 1329 N = nrow++ - 1; 1330 high++; /* added new column index thus must search to one higher than before */ 1331 /* shift up all the later entries in this row */ 1332 for (ii = N; ii >= i; ii--) { 1333 rp[ii + 1] = rp[ii]; 1334 PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16)); 1335 } 1336 if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1337 rp[i] = col; 1338 bap = ap + 16 * i; 1339 for (ii = 0; ii < 4; ii++, value += stepval) { 1340 for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++; 1341 } 1342 noinsert2:; 1343 low = i; 1344 } 1345 ailen[row] = nrow; 1346 } 1347 PetscFunctionReturnVoid(); 1348 } 1349 1350 #if defined(PETSC_HAVE_FORTRAN_CAPS) 1351 #define matsetvalues4_ MATSETVALUES4 1352 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 1353 #define matsetvalues4_ matsetvalues4 1354 #endif 1355 1356 PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v) 1357 { 1358 Mat A = *AA; 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1360 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm; 1361 PetscInt *ai = a->i, *ailen = a->ilen; 1362 PetscInt *aj = a->j, brow, bcol; 1363 PetscInt ridx, cidx, lastcol = -1; 1364 MatScalar *ap, value, *aa = a->a, *bap; 1365 1366 PetscFunctionBegin; 1367 for (k = 0; k < m; k++) { /* loop over added rows */ 1368 row = im[k]; 1369 brow = row / 4; 1370 rp = aj + ai[brow]; 1371 ap = aa + 16 * ai[brow]; 1372 nrow = ailen[brow]; 1373 low = 0; 1374 high = nrow; 1375 for (l = 0; l < n; l++) { /* loop over added columns */ 1376 col = in[l]; 1377 bcol = col / 4; 1378 ridx = row % 4; 1379 cidx = col % 4; 1380 value = v[l + k * n]; 1381 if (col <= lastcol) low = 0; 1382 else high = nrow; 1383 lastcol = col; 1384 while (high - low > 7) { 1385 t = (low + high) / 2; 1386 if (rp[t] > bcol) high = t; 1387 else low = t; 1388 } 1389 for (i = low; i < high; i++) { 1390 if (rp[i] > bcol) break; 1391 if (rp[i] == bcol) { 1392 bap = ap + 16 * i + 4 * cidx + ridx; 1393 *bap += value; 1394 goto noinsert1; 1395 } 1396 } 1397 N = nrow++ - 1; 1398 high++; /* added new column thus must search to one higher than before */ 1399 /* shift up all the later entries in this row */ 1400 PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 1401 PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1))); 1402 PetscCallVoid(PetscArrayzero(ap + 16 * i, 16)); 1403 rp[i] = bcol; 1404 ap[16 * i + 4 * cidx + ridx] = value; 1405 noinsert1:; 1406 low = i; 1407 } 1408 ailen[brow] = nrow; 1409 } 1410 PetscFunctionReturnVoid(); 1411 } 1412 1413 /* 1414 Checks for missing diagonals 1415 */ 1416 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1417 { 1418 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1419 PetscInt *diag, *ii = a->i, i; 1420 1421 PetscFunctionBegin; 1422 PetscCall(MatMarkDiagonal_SeqBAIJ(A)); 1423 *missing = PETSC_FALSE; 1424 if (A->rmap->n > 0 && !ii) { 1425 *missing = PETSC_TRUE; 1426 if (d) *d = 0; 1427 PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n")); 1428 } else { 1429 PetscInt n; 1430 n = PetscMin(a->mbs, a->nbs); 1431 diag = a->diag; 1432 for (i = 0; i < n; i++) { 1433 if (diag[i] >= ii[i + 1]) { 1434 *missing = PETSC_TRUE; 1435 if (d) *d = i; 1436 PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i)); 1437 break; 1438 } 1439 } 1440 } 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1445 { 1446 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1447 PetscInt i, j, m = a->mbs; 1448 1449 PetscFunctionBegin; 1450 if (!a->diag) { 1451 PetscCall(PetscMalloc1(m, &a->diag)); 1452 a->free_diag = PETSC_TRUE; 1453 } 1454 for (i = 0; i < m; i++) { 1455 a->diag[i] = a->i[i + 1]; 1456 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1457 if (a->j[j] == i) { 1458 a->diag[i] = j; 1459 break; 1460 } 1461 } 1462 } 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done) 1467 { 1468 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1469 PetscInt i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt; 1470 PetscInt **ia = (PetscInt **)inia, **ja = (PetscInt **)inja; 1471 1472 PetscFunctionBegin; 1473 *nn = n; 1474 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1475 if (symmetric) { 1476 PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja)); 1477 nz = tia[n]; 1478 } else { 1479 tia = a->i; 1480 tja = a->j; 1481 } 1482 1483 if (!blockcompressed && bs > 1) { 1484 (*nn) *= bs; 1485 /* malloc & create the natural set of indices */ 1486 PetscCall(PetscMalloc1((n + 1) * bs, ia)); 1487 if (n) { 1488 (*ia)[0] = oshift; 1489 for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1]; 1490 } 1491 1492 for (i = 1; i < n; i++) { 1493 (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1]; 1494 for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1]; 1495 } 1496 if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1]; 1497 1498 if (inja) { 1499 PetscCall(PetscMalloc1(nz * bs * bs, ja)); 1500 cnt = 0; 1501 for (i = 0; i < n; i++) { 1502 for (j = 0; j < bs; j++) { 1503 for (k = tia[i]; k < tia[i + 1]; k++) { 1504 for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l; 1505 } 1506 } 1507 } 1508 } 1509 1510 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1511 PetscCall(PetscFree(tia)); 1512 PetscCall(PetscFree(tja)); 1513 } 1514 } else if (oshift == 1) { 1515 if (symmetric) { 1516 nz = tia[A->rmap->n / bs]; 1517 /* add 1 to i and j indices */ 1518 for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1; 1519 *ia = tia; 1520 if (ja) { 1521 for (i = 0; i < nz; i++) tja[i] = tja[i] + 1; 1522 *ja = tja; 1523 } 1524 } else { 1525 nz = a->i[A->rmap->n / bs]; 1526 /* malloc space and add 1 to i and j indices */ 1527 PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia)); 1528 for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1; 1529 if (ja) { 1530 PetscCall(PetscMalloc1(nz, ja)); 1531 for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1; 1532 } 1533 } 1534 } else { 1535 *ia = tia; 1536 if (ja) *ja = tja; 1537 } 1538 PetscFunctionReturn(PETSC_SUCCESS); 1539 } 1540 1541 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 1542 { 1543 PetscFunctionBegin; 1544 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 1545 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1546 PetscCall(PetscFree(*ia)); 1547 if (ja) PetscCall(PetscFree(*ja)); 1548 } 1549 PetscFunctionReturn(PETSC_SUCCESS); 1550 } 1551 1552 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1553 { 1554 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1555 1556 PetscFunctionBegin; 1557 #if defined(PETSC_USE_LOG) 1558 PetscCall(PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz)); 1559 #endif 1560 PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i)); 1561 PetscCall(ISDestroy(&a->row)); 1562 PetscCall(ISDestroy(&a->col)); 1563 if (a->free_diag) PetscCall(PetscFree(a->diag)); 1564 PetscCall(PetscFree(a->idiag)); 1565 if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen)); 1566 PetscCall(PetscFree(a->solve_work)); 1567 PetscCall(PetscFree(a->mult_work)); 1568 PetscCall(PetscFree(a->sor_workt)); 1569 PetscCall(PetscFree(a->sor_work)); 1570 PetscCall(ISDestroy(&a->icol)); 1571 PetscCall(PetscFree(a->saved_values)); 1572 PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex)); 1573 1574 PetscCall(MatDestroy(&a->sbaijMat)); 1575 PetscCall(MatDestroy(&a->parent)); 1576 PetscCall(PetscFree(A->data)); 1577 1578 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 1579 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL)); 1580 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL)); 1581 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL)); 1582 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL)); 1583 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL)); 1584 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL)); 1585 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL)); 1586 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL)); 1587 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL)); 1588 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL)); 1589 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL)); 1590 #if defined(PETSC_HAVE_HYPRE) 1591 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL)); 1592 #endif 1593 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL)); 1594 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1595 PetscFunctionReturn(PETSC_SUCCESS); 1596 } 1597 1598 PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg) 1599 { 1600 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1601 1602 PetscFunctionBegin; 1603 switch (op) { 1604 case MAT_ROW_ORIENTED: 1605 a->roworiented = flg; 1606 break; 1607 case MAT_KEEP_NONZERO_PATTERN: 1608 a->keepnonzeropattern = flg; 1609 break; 1610 case MAT_NEW_NONZERO_LOCATIONS: 1611 a->nonew = (flg ? 0 : 1); 1612 break; 1613 case MAT_NEW_NONZERO_LOCATION_ERR: 1614 a->nonew = (flg ? -1 : 0); 1615 break; 1616 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1617 a->nonew = (flg ? -2 : 0); 1618 break; 1619 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1620 a->nounused = (flg ? -1 : 0); 1621 break; 1622 case MAT_FORCE_DIAGONAL_ENTRIES: 1623 case MAT_IGNORE_OFF_PROC_ENTRIES: 1624 case MAT_USE_HASH_TABLE: 1625 case MAT_SORTED_FULL: 1626 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1627 break; 1628 case MAT_SPD: 1629 case MAT_SYMMETRIC: 1630 case MAT_STRUCTURALLY_SYMMETRIC: 1631 case MAT_HERMITIAN: 1632 case MAT_SYMMETRY_ETERNAL: 1633 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1634 case MAT_SUBMAT_SINGLEIS: 1635 case MAT_STRUCTURE_ONLY: 1636 case MAT_SPD_ETERNAL: 1637 /* if the diagonal matrix is square it inherits some of the properties above */ 1638 break; 1639 default: 1640 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1641 } 1642 PetscFunctionReturn(PETSC_SUCCESS); 1643 } 1644 1645 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1646 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa) 1647 { 1648 PetscInt itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2; 1649 MatScalar *aa_i; 1650 PetscScalar *v_i; 1651 1652 PetscFunctionBegin; 1653 bs = A->rmap->bs; 1654 bs2 = bs * bs; 1655 PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1656 1657 bn = row / bs; /* Block number */ 1658 bp = row % bs; /* Block Position */ 1659 M = ai[bn + 1] - ai[bn]; 1660 *nz = bs * M; 1661 1662 if (v) { 1663 *v = NULL; 1664 if (*nz) { 1665 PetscCall(PetscMalloc1(*nz, v)); 1666 for (i = 0; i < M; i++) { /* for each block in the block row */ 1667 v_i = *v + i * bs; 1668 aa_i = aa + bs2 * (ai[bn] + i); 1669 for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j]; 1670 } 1671 } 1672 } 1673 1674 if (idx) { 1675 *idx = NULL; 1676 if (*nz) { 1677 PetscCall(PetscMalloc1(*nz, idx)); 1678 for (i = 0; i < M; i++) { /* for each block in the block row */ 1679 idx_i = *idx + i * bs; 1680 itmp = bs * aj[ai[bn] + i]; 1681 for (j = 0; j < bs; j++) idx_i[j] = itmp++; 1682 } 1683 } 1684 } 1685 PetscFunctionReturn(PETSC_SUCCESS); 1686 } 1687 1688 PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1689 { 1690 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1691 1692 PetscFunctionBegin; 1693 PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a)); 1694 PetscFunctionReturn(PETSC_SUCCESS); 1695 } 1696 1697 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1698 { 1699 PetscFunctionBegin; 1700 if (nz) *nz = 0; 1701 if (idx) PetscCall(PetscFree(*idx)); 1702 if (v) PetscCall(PetscFree(*v)); 1703 PetscFunctionReturn(PETSC_SUCCESS); 1704 } 1705 1706 PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B) 1707 { 1708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at; 1709 Mat C; 1710 PetscInt i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill; 1711 PetscInt bs2 = a->bs2, *ati, *atj, anzj, kr; 1712 MatScalar *ata, *aa = a->a; 1713 1714 PetscFunctionBegin; 1715 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1716 PetscCall(PetscCalloc1(1 + nbs, &atfill)); 1717 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1718 for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */ 1719 1720 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C)); 1721 PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N)); 1722 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 1723 PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill)); 1724 1725 at = (Mat_SeqBAIJ *)C->data; 1726 ati = at->i; 1727 for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i]; 1728 } else { 1729 C = *B; 1730 at = (Mat_SeqBAIJ *)C->data; 1731 ati = at->i; 1732 } 1733 1734 atj = at->j; 1735 ata = at->a; 1736 1737 /* Copy ati into atfill so we have locations of the next free space in atj */ 1738 PetscCall(PetscArraycpy(atfill, ati, nbs)); 1739 1740 /* Walk through A row-wise and mark nonzero entries of A^T. */ 1741 for (i = 0; i < mbs; i++) { 1742 anzj = ai[i + 1] - ai[i]; 1743 for (j = 0; j < anzj; j++) { 1744 atj[atfill[*aj]] = i; 1745 for (kr = 0; kr < bs; kr++) { 1746 for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++; 1747 } 1748 atfill[*aj++] += 1; 1749 } 1750 } 1751 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 1752 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 1753 1754 /* Clean up temporary space and complete requests. */ 1755 PetscCall(PetscFree(atfill)); 1756 1757 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1758 PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1759 *B = C; 1760 } else { 1761 PetscCall(MatHeaderMerge(A, &C)); 1762 } 1763 PetscFunctionReturn(PETSC_SUCCESS); 1764 } 1765 1766 static PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f) 1767 { 1768 Mat Btrans; 1769 1770 PetscFunctionBegin; 1771 *f = PETSC_FALSE; 1772 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans)); 1773 PetscCall(MatEqual_SeqBAIJ(B, Btrans, f)); 1774 PetscCall(MatDestroy(&Btrans)); 1775 PetscFunctionReturn(PETSC_SUCCESS); 1776 } 1777 1778 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 1779 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 1780 { 1781 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data; 1782 PetscInt header[4], M, N, m, bs, nz, cnt, i, j, k, l; 1783 PetscInt *rowlens, *colidxs; 1784 PetscScalar *matvals; 1785 1786 PetscFunctionBegin; 1787 PetscCall(PetscViewerSetUp(viewer)); 1788 1789 M = mat->rmap->N; 1790 N = mat->cmap->N; 1791 m = mat->rmap->n; 1792 bs = mat->rmap->bs; 1793 nz = bs * bs * A->nz; 1794 1795 /* write matrix header */ 1796 header[0] = MAT_FILE_CLASSID; 1797 header[1] = M; 1798 header[2] = N; 1799 header[3] = nz; 1800 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1801 1802 /* store row lengths */ 1803 PetscCall(PetscMalloc1(m, &rowlens)); 1804 for (cnt = 0, i = 0; i < A->mbs; i++) 1805 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]); 1806 PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT)); 1807 PetscCall(PetscFree(rowlens)); 1808 1809 /* store column indices */ 1810 PetscCall(PetscMalloc1(nz, &colidxs)); 1811 for (cnt = 0, i = 0; i < A->mbs; i++) 1812 for (k = 0; k < bs; k++) 1813 for (j = A->i[i]; j < A->i[i + 1]; j++) 1814 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l; 1815 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1816 PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT)); 1817 PetscCall(PetscFree(colidxs)); 1818 1819 /* store nonzero values */ 1820 PetscCall(PetscMalloc1(nz, &matvals)); 1821 for (cnt = 0, i = 0; i < A->mbs; i++) 1822 for (k = 0; k < bs; k++) 1823 for (j = A->i[i]; j < A->i[i + 1]; j++) 1824 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k]; 1825 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1826 PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR)); 1827 PetscCall(PetscFree(matvals)); 1828 1829 /* write block size option to the viewer's .info file */ 1830 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1831 PetscFunctionReturn(PETSC_SUCCESS); 1832 } 1833 1834 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer) 1835 { 1836 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1837 PetscInt i, bs = A->rmap->bs, k; 1838 1839 PetscFunctionBegin; 1840 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1841 for (i = 0; i < a->mbs; i++) { 1842 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1)); 1843 for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1)); 1844 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1845 } 1846 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1847 PetscFunctionReturn(PETSC_SUCCESS); 1848 } 1849 1850 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer) 1851 { 1852 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1853 PetscInt i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2; 1854 PetscViewerFormat format; 1855 1856 PetscFunctionBegin; 1857 if (A->structure_only) { 1858 PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer)); 1859 PetscFunctionReturn(PETSC_SUCCESS); 1860 } 1861 1862 PetscCall(PetscViewerGetFormat(viewer, &format)); 1863 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1864 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1865 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1866 const char *matname; 1867 Mat aij; 1868 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij)); 1869 PetscCall(PetscObjectGetName((PetscObject)A, &matname)); 1870 PetscCall(PetscObjectSetName((PetscObject)aij, matname)); 1871 PetscCall(MatView(aij, viewer)); 1872 PetscCall(MatDestroy(&aij)); 1873 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1874 PetscFunctionReturn(PETSC_SUCCESS); 1875 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1876 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1877 for (i = 0; i < a->mbs; i++) { 1878 for (j = 0; j < bs; j++) { 1879 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1880 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1881 for (l = 0; l < bs; l++) { 1882 #if defined(PETSC_USE_COMPLEX) 1883 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1884 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1885 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1886 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1887 } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) { 1888 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1889 } 1890 #else 1891 if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1892 #endif 1893 } 1894 } 1895 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1896 } 1897 } 1898 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1899 } else { 1900 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE)); 1901 for (i = 0; i < a->mbs; i++) { 1902 for (j = 0; j < bs; j++) { 1903 PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j)); 1904 for (k = a->i[i]; k < a->i[i + 1]; k++) { 1905 for (l = 0; l < bs; l++) { 1906 #if defined(PETSC_USE_COMPLEX) 1907 if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) { 1908 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1909 } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) { 1910 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j]))); 1911 } else { 1912 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]))); 1913 } 1914 #else 1915 PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j])); 1916 #endif 1917 } 1918 } 1919 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 1920 } 1921 } 1922 PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE)); 1923 } 1924 PetscCall(PetscViewerFlush(viewer)); 1925 PetscFunctionReturn(PETSC_SUCCESS); 1926 } 1927 1928 #include <petscdraw.h> 1929 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa) 1930 { 1931 Mat A = (Mat)Aa; 1932 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 1933 PetscInt row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2; 1934 PetscReal xl, yl, xr, yr, x_l, x_r, y_l, y_r; 1935 MatScalar *aa; 1936 PetscViewer viewer; 1937 PetscViewerFormat format; 1938 1939 PetscFunctionBegin; 1940 PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer)); 1941 PetscCall(PetscViewerGetFormat(viewer, &format)); 1942 PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr)); 1943 1944 /* loop over matrix elements drawing boxes */ 1945 1946 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1947 PetscDrawCollectiveBegin(draw); 1948 /* Blue for negative, Cyan for zero and Red for positive */ 1949 color = PETSC_DRAW_BLUE; 1950 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1951 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1952 y_l = A->rmap->N - row - 1.0; 1953 y_r = y_l + 1.0; 1954 x_l = a->j[j] * bs; 1955 x_r = x_l + 1.0; 1956 aa = a->a + j * bs2; 1957 for (k = 0; k < bs; k++) { 1958 for (l = 0; l < bs; l++) { 1959 if (PetscRealPart(*aa++) >= 0.) continue; 1960 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1961 } 1962 } 1963 } 1964 } 1965 color = PETSC_DRAW_CYAN; 1966 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1967 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1968 y_l = A->rmap->N - row - 1.0; 1969 y_r = y_l + 1.0; 1970 x_l = a->j[j] * bs; 1971 x_r = x_l + 1.0; 1972 aa = a->a + j * bs2; 1973 for (k = 0; k < bs; k++) { 1974 for (l = 0; l < bs; l++) { 1975 if (PetscRealPart(*aa++) != 0.) continue; 1976 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1977 } 1978 } 1979 } 1980 } 1981 color = PETSC_DRAW_RED; 1982 for (i = 0, row = 0; i < mbs; i++, row += bs) { 1983 for (j = a->i[i]; j < a->i[i + 1]; j++) { 1984 y_l = A->rmap->N - row - 1.0; 1985 y_r = y_l + 1.0; 1986 x_l = a->j[j] * bs; 1987 x_r = x_l + 1.0; 1988 aa = a->a + j * bs2; 1989 for (k = 0; k < bs; k++) { 1990 for (l = 0; l < bs; l++) { 1991 if (PetscRealPart(*aa++) <= 0.) continue; 1992 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 1993 } 1994 } 1995 } 1996 } 1997 PetscDrawCollectiveEnd(draw); 1998 } else { 1999 /* use contour shading to indicate magnitude of values */ 2000 /* first determine max of all nonzero values */ 2001 PetscReal minv = 0.0, maxv = 0.0; 2002 PetscDraw popup; 2003 2004 for (i = 0; i < a->nz * a->bs2; i++) { 2005 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 2006 } 2007 if (minv >= maxv) maxv = minv + PETSC_SMALL; 2008 PetscCall(PetscDrawGetPopup(draw, &popup)); 2009 PetscCall(PetscDrawScalePopup(popup, 0.0, maxv)); 2010 2011 PetscDrawCollectiveBegin(draw); 2012 for (i = 0, row = 0; i < mbs; i++, row += bs) { 2013 for (j = a->i[i]; j < a->i[i + 1]; j++) { 2014 y_l = A->rmap->N - row - 1.0; 2015 y_r = y_l + 1.0; 2016 x_l = a->j[j] * bs; 2017 x_r = x_l + 1.0; 2018 aa = a->a + j * bs2; 2019 for (k = 0; k < bs; k++) { 2020 for (l = 0; l < bs; l++) { 2021 MatScalar v = *aa++; 2022 color = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv); 2023 PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color)); 2024 } 2025 } 2026 } 2027 } 2028 PetscDrawCollectiveEnd(draw); 2029 } 2030 PetscFunctionReturn(PETSC_SUCCESS); 2031 } 2032 2033 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer) 2034 { 2035 PetscReal xl, yl, xr, yr, w, h; 2036 PetscDraw draw; 2037 PetscBool isnull; 2038 2039 PetscFunctionBegin; 2040 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 2041 PetscCall(PetscDrawIsNull(draw, &isnull)); 2042 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 2043 2044 xr = A->cmap->n; 2045 yr = A->rmap->N; 2046 h = yr / 10.0; 2047 w = xr / 10.0; 2048 xr += w; 2049 yr += h; 2050 xl = -w; 2051 yl = -h; 2052 PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr)); 2053 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer)); 2054 PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A)); 2055 PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL)); 2056 PetscCall(PetscDrawSave(draw)); 2057 PetscFunctionReturn(PETSC_SUCCESS); 2058 } 2059 2060 PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer) 2061 { 2062 PetscBool iascii, isbinary, isdraw; 2063 2064 PetscFunctionBegin; 2065 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 2066 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2067 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 2068 if (iascii) { 2069 PetscCall(MatView_SeqBAIJ_ASCII(A, viewer)); 2070 } else if (isbinary) { 2071 PetscCall(MatView_SeqBAIJ_Binary(A, viewer)); 2072 } else if (isdraw) { 2073 PetscCall(MatView_SeqBAIJ_Draw(A, viewer)); 2074 } else { 2075 Mat B; 2076 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 2077 PetscCall(MatView(B, viewer)); 2078 PetscCall(MatDestroy(&B)); 2079 } 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[]) 2084 { 2085 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2086 PetscInt *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 2087 PetscInt *ai = a->i, *ailen = a->ilen; 2088 PetscInt brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2; 2089 MatScalar *ap, *aa = a->a; 2090 2091 PetscFunctionBegin; 2092 for (k = 0; k < m; k++) { /* loop over rows */ 2093 row = im[k]; 2094 brow = row / bs; 2095 if (row < 0) { 2096 v += n; 2097 continue; 2098 } /* negative row */ 2099 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row); 2100 rp = aj ? aj + ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2101 ap = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */ 2102 nrow = ailen[brow]; 2103 for (l = 0; l < n; l++) { /* loop over columns */ 2104 if (in[l] < 0) { 2105 v++; 2106 continue; 2107 } /* negative column */ 2108 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]); 2109 col = in[l]; 2110 bcol = col / bs; 2111 cidx = col % bs; 2112 ridx = row % bs; 2113 high = nrow; 2114 low = 0; /* assume unsorted */ 2115 while (high - low > 5) { 2116 t = (low + high) / 2; 2117 if (rp[t] > bcol) high = t; 2118 else low = t; 2119 } 2120 for (i = low; i < high; i++) { 2121 if (rp[i] > bcol) break; 2122 if (rp[i] == bcol) { 2123 *v++ = ap[bs2 * i + bs * cidx + ridx]; 2124 goto finished; 2125 } 2126 } 2127 *v++ = 0.0; 2128 finished:; 2129 } 2130 } 2131 PetscFunctionReturn(PETSC_SUCCESS); 2132 } 2133 2134 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2135 { 2136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2137 PetscInt *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1; 2138 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2139 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval; 2140 PetscBool roworiented = a->roworiented; 2141 const PetscScalar *value = v; 2142 MatScalar *ap = NULL, *aa = a->a, *bap; 2143 2144 PetscFunctionBegin; 2145 if (roworiented) { 2146 stepval = (n - 1) * bs; 2147 } else { 2148 stepval = (m - 1) * bs; 2149 } 2150 for (k = 0; k < m; k++) { /* loop over added rows */ 2151 row = im[k]; 2152 if (row < 0) continue; 2153 PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1); 2154 rp = aj + ai[row]; 2155 if (!A->structure_only) ap = aa + bs2 * ai[row]; 2156 rmax = imax[row]; 2157 nrow = ailen[row]; 2158 low = 0; 2159 high = nrow; 2160 for (l = 0; l < n; l++) { /* loop over added columns */ 2161 if (in[l] < 0) continue; 2162 PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1); 2163 col = in[l]; 2164 if (!A->structure_only) { 2165 if (roworiented) { 2166 value = v + (k * (stepval + bs) + l) * bs; 2167 } else { 2168 value = v + (l * (stepval + bs) + k) * bs; 2169 } 2170 } 2171 if (col <= lastcol) low = 0; 2172 else high = nrow; 2173 lastcol = col; 2174 while (high - low > 7) { 2175 t = (low + high) / 2; 2176 if (rp[t] > col) high = t; 2177 else low = t; 2178 } 2179 for (i = low; i < high; i++) { 2180 if (rp[i] > col) break; 2181 if (rp[i] == col) { 2182 if (A->structure_only) goto noinsert2; 2183 bap = ap + bs2 * i; 2184 if (roworiented) { 2185 if (is == ADD_VALUES) { 2186 for (ii = 0; ii < bs; ii++, value += stepval) { 2187 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 2188 } 2189 } else { 2190 for (ii = 0; ii < bs; ii++, value += stepval) { 2191 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2192 } 2193 } 2194 } else { 2195 if (is == ADD_VALUES) { 2196 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2197 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 2198 bap += bs; 2199 } 2200 } else { 2201 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 2202 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 2203 bap += bs; 2204 } 2205 } 2206 } 2207 goto noinsert2; 2208 } 2209 } 2210 if (nonew == 1) goto noinsert2; 2211 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2212 if (A->structure_only) { 2213 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar); 2214 } else { 2215 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2216 } 2217 N = nrow++ - 1; 2218 high++; 2219 /* shift up all the later entries in this row */ 2220 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2221 rp[i] = col; 2222 if (!A->structure_only) { 2223 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2224 bap = ap + bs2 * i; 2225 if (roworiented) { 2226 for (ii = 0; ii < bs; ii++, value += stepval) { 2227 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 2228 } 2229 } else { 2230 for (ii = 0; ii < bs; ii++, value += stepval) { 2231 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 2232 } 2233 } 2234 } 2235 noinsert2:; 2236 low = i; 2237 } 2238 ailen[row] = nrow; 2239 } 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } 2242 2243 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode) 2244 { 2245 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2246 PetscInt fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax; 2247 PetscInt m = A->rmap->N, *ip, N, *ailen = a->ilen; 2248 PetscInt mbs = a->mbs, bs2 = a->bs2, rmax = 0; 2249 MatScalar *aa = a->a, *ap; 2250 PetscReal ratio = 0.6; 2251 2252 PetscFunctionBegin; 2253 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS); 2254 2255 if (m) rmax = ailen[0]; 2256 for (i = 1; i < mbs; i++) { 2257 /* move each row back by the amount of empty slots (fshift) before it*/ 2258 fshift += imax[i - 1] - ailen[i - 1]; 2259 rmax = PetscMax(rmax, ailen[i]); 2260 if (fshift) { 2261 ip = aj + ai[i]; 2262 ap = aa + bs2 * ai[i]; 2263 N = ailen[i]; 2264 PetscCall(PetscArraymove(ip - fshift, ip, N)); 2265 if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N)); 2266 } 2267 ai[i] = ai[i - 1] + ailen[i - 1]; 2268 } 2269 if (mbs) { 2270 fshift += imax[mbs - 1] - ailen[mbs - 1]; 2271 ai[mbs] = ai[mbs - 1] + ailen[mbs - 1]; 2272 } 2273 2274 /* reset ilen and imax for each row */ 2275 a->nonzerorowcnt = 0; 2276 if (A->structure_only) { 2277 PetscCall(PetscFree2(a->imax, a->ilen)); 2278 } else { /* !A->structure_only */ 2279 for (i = 0; i < mbs; i++) { 2280 ailen[i] = imax[i] = ai[i + 1] - ai[i]; 2281 a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0); 2282 } 2283 } 2284 a->nz = ai[mbs]; 2285 2286 /* diagonals may have moved, so kill the diagonal pointers */ 2287 a->idiagvalid = PETSC_FALSE; 2288 if (fshift && a->diag) PetscCall(PetscFree(a->diag)); 2289 if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2); 2290 PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2)); 2291 PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs)); 2292 PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax)); 2293 2294 A->info.mallocs += a->reallocs; 2295 a->reallocs = 0; 2296 A->info.nz_unneeded = (PetscReal)fshift * bs2; 2297 a->rmax = rmax; 2298 2299 if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio)); 2300 PetscFunctionReturn(PETSC_SUCCESS); 2301 } 2302 2303 /* 2304 This function returns an array of flags which indicate the locations of contiguous 2305 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 2306 then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)] 2307 Assume: sizes should be long enough to hold all the values. 2308 */ 2309 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max) 2310 { 2311 PetscInt j = 0; 2312 2313 PetscFunctionBegin; 2314 for (PetscInt i = 0; i < n; j++) { 2315 PetscInt row = idx[i]; 2316 if (row % bs != 0) { /* Not the beginning of a block */ 2317 sizes[j] = 1; 2318 i++; 2319 } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */ 2320 sizes[j] = 1; /* Also makes sure at least 'bs' values exist for next else */ 2321 i++; 2322 } else { /* Beginning of the block, so check if the complete block exists */ 2323 PetscBool flg = PETSC_TRUE; 2324 for (PetscInt k = 1; k < bs; k++) { 2325 if (row + k != idx[i + k]) { /* break in the block */ 2326 flg = PETSC_FALSE; 2327 break; 2328 } 2329 } 2330 if (flg) { /* No break in the bs */ 2331 sizes[j] = bs; 2332 i += bs; 2333 } else { 2334 sizes[j] = 1; 2335 i++; 2336 } 2337 } 2338 } 2339 *bs_max = j; 2340 PetscFunctionReturn(PETSC_SUCCESS); 2341 } 2342 2343 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2344 { 2345 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2346 PetscInt i, j, k, count, *rows; 2347 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max; 2348 PetscScalar zero = 0.0; 2349 MatScalar *aa; 2350 const PetscScalar *xx; 2351 PetscScalar *bb; 2352 2353 PetscFunctionBegin; 2354 /* fix right hand side if needed */ 2355 if (x && b) { 2356 PetscCall(VecGetArrayRead(x, &xx)); 2357 PetscCall(VecGetArray(b, &bb)); 2358 for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]]; 2359 PetscCall(VecRestoreArrayRead(x, &xx)); 2360 PetscCall(VecRestoreArray(b, &bb)); 2361 } 2362 2363 /* Make a copy of the IS and sort it */ 2364 /* allocate memory for rows,sizes */ 2365 PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes)); 2366 2367 /* copy IS values to rows, and sort them */ 2368 for (i = 0; i < is_n; i++) rows[i] = is_idx[i]; 2369 PetscCall(PetscSortInt(is_n, rows)); 2370 2371 if (baij->keepnonzeropattern) { 2372 for (i = 0; i < is_n; i++) sizes[i] = 1; 2373 bs_max = is_n; 2374 } else { 2375 PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max)); 2376 A->nonzerostate++; 2377 } 2378 2379 for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) { 2380 row = rows[j]; 2381 PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row); 2382 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2383 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2384 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2385 if (diag != (PetscScalar)0.0) { 2386 if (baij->ilen[row / bs] > 0) { 2387 baij->ilen[row / bs] = 1; 2388 baij->j[baij->i[row / bs]] = row / bs; 2389 2390 PetscCall(PetscArrayzero(aa, count * bs)); 2391 } 2392 /* Now insert all the diagonal values for this bs */ 2393 for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES)); 2394 } else { /* (diag == 0.0) */ 2395 baij->ilen[row / bs] = 0; 2396 } /* end (diag == 0.0) */ 2397 } else { /* (sizes[i] != bs) */ 2398 PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1"); 2399 for (k = 0; k < count; k++) { 2400 aa[0] = zero; 2401 aa += bs; 2402 } 2403 if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES)); 2404 } 2405 } 2406 2407 PetscCall(PetscFree2(rows, sizes)); 2408 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2409 PetscFunctionReturn(PETSC_SUCCESS); 2410 } 2411 2412 static PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b) 2413 { 2414 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)A->data; 2415 PetscInt i, j, k, count; 2416 PetscInt bs = A->rmap->bs, bs2 = baij->bs2, row, col; 2417 PetscScalar zero = 0.0; 2418 MatScalar *aa; 2419 const PetscScalar *xx; 2420 PetscScalar *bb; 2421 PetscBool *zeroed, vecs = PETSC_FALSE; 2422 2423 PetscFunctionBegin; 2424 /* fix right hand side if needed */ 2425 if (x && b) { 2426 PetscCall(VecGetArrayRead(x, &xx)); 2427 PetscCall(VecGetArray(b, &bb)); 2428 vecs = PETSC_TRUE; 2429 } 2430 2431 /* zero the columns */ 2432 PetscCall(PetscCalloc1(A->rmap->n, &zeroed)); 2433 for (i = 0; i < is_n; i++) { 2434 PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]); 2435 zeroed[is_idx[i]] = PETSC_TRUE; 2436 } 2437 for (i = 0; i < A->rmap->N; i++) { 2438 if (!zeroed[i]) { 2439 row = i / bs; 2440 for (j = baij->i[row]; j < baij->i[row + 1]; j++) { 2441 for (k = 0; k < bs; k++) { 2442 col = bs * baij->j[j] + k; 2443 if (zeroed[col]) { 2444 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 2445 if (vecs) bb[i] -= aa[0] * xx[col]; 2446 aa[0] = 0.0; 2447 } 2448 } 2449 } 2450 } else if (vecs) bb[i] = diag * xx[i]; 2451 } 2452 PetscCall(PetscFree(zeroed)); 2453 if (vecs) { 2454 PetscCall(VecRestoreArrayRead(x, &xx)); 2455 PetscCall(VecRestoreArray(b, &bb)); 2456 } 2457 2458 /* zero the rows */ 2459 for (i = 0; i < is_n; i++) { 2460 row = is_idx[i]; 2461 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 2462 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 2463 for (k = 0; k < count; k++) { 2464 aa[0] = zero; 2465 aa += bs; 2466 } 2467 if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES); 2468 } 2469 PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY)); 2470 PetscFunctionReturn(PETSC_SUCCESS); 2471 } 2472 2473 PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is) 2474 { 2475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2476 PetscInt *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1; 2477 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 2478 PetscInt *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol; 2479 PetscInt ridx, cidx, bs2 = a->bs2; 2480 PetscBool roworiented = a->roworiented; 2481 MatScalar *ap = NULL, value = 0.0, *aa = a->a, *bap; 2482 2483 PetscFunctionBegin; 2484 for (k = 0; k < m; k++) { /* loop over added rows */ 2485 row = im[k]; 2486 brow = row / bs; 2487 if (row < 0) continue; 2488 PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1); 2489 rp = aj + ai[brow]; 2490 if (!A->structure_only) ap = aa + bs2 * ai[brow]; 2491 rmax = imax[brow]; 2492 nrow = ailen[brow]; 2493 low = 0; 2494 high = nrow; 2495 for (l = 0; l < n; l++) { /* loop over added columns */ 2496 if (in[l] < 0) continue; 2497 PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1); 2498 col = in[l]; 2499 bcol = col / bs; 2500 ridx = row % bs; 2501 cidx = col % bs; 2502 if (!A->structure_only) { 2503 if (roworiented) { 2504 value = v[l + k * n]; 2505 } else { 2506 value = v[k + l * m]; 2507 } 2508 } 2509 if (col <= lastcol) low = 0; 2510 else high = nrow; 2511 lastcol = col; 2512 while (high - low > 7) { 2513 t = (low + high) / 2; 2514 if (rp[t] > bcol) high = t; 2515 else low = t; 2516 } 2517 for (i = low; i < high; i++) { 2518 if (rp[i] > bcol) break; 2519 if (rp[i] == bcol) { 2520 bap = ap + bs2 * i + bs * cidx + ridx; 2521 if (!A->structure_only) { 2522 if (is == ADD_VALUES) *bap += value; 2523 else *bap = value; 2524 } 2525 goto noinsert1; 2526 } 2527 } 2528 if (nonew == 1) goto noinsert1; 2529 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col); 2530 if (A->structure_only) { 2531 MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar); 2532 } else { 2533 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 2534 } 2535 N = nrow++ - 1; 2536 high++; 2537 /* shift up all the later entries in this row */ 2538 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 2539 rp[i] = bcol; 2540 if (!A->structure_only) { 2541 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 2542 PetscCall(PetscArrayzero(ap + bs2 * i, bs2)); 2543 ap[bs2 * i + bs * cidx + ridx] = value; 2544 } 2545 a->nz++; 2546 A->nonzerostate++; 2547 noinsert1:; 2548 low = i; 2549 } 2550 ailen[brow] = nrow; 2551 } 2552 PetscFunctionReturn(PETSC_SUCCESS); 2553 } 2554 2555 static PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info) 2556 { 2557 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data; 2558 Mat outA; 2559 PetscBool row_identity, col_identity; 2560 2561 PetscFunctionBegin; 2562 PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU"); 2563 PetscCall(ISIdentity(row, &row_identity)); 2564 PetscCall(ISIdentity(col, &col_identity)); 2565 PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU"); 2566 2567 outA = inA; 2568 inA->factortype = MAT_FACTOR_LU; 2569 PetscCall(PetscFree(inA->solvertype)); 2570 PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype)); 2571 2572 PetscCall(MatMarkDiagonal_SeqBAIJ(inA)); 2573 2574 PetscCall(PetscObjectReference((PetscObject)row)); 2575 PetscCall(ISDestroy(&a->row)); 2576 a->row = row; 2577 PetscCall(PetscObjectReference((PetscObject)col)); 2578 PetscCall(ISDestroy(&a->col)); 2579 a->col = col; 2580 2581 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2582 PetscCall(ISDestroy(&a->icol)); 2583 PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol)); 2584 2585 PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity))); 2586 if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); } 2587 PetscCall(MatLUFactorNumeric(outA, inA, info)); 2588 PetscFunctionReturn(PETSC_SUCCESS); 2589 } 2590 2591 static PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, const PetscInt *indices) 2592 { 2593 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 2594 2595 PetscFunctionBegin; 2596 baij->nz = baij->maxnz; 2597 PetscCall(PetscArraycpy(baij->j, indices, baij->nz)); 2598 PetscCall(PetscArraycpy(baij->ilen, baij->imax, baij->mbs)); 2599 PetscFunctionReturn(PETSC_SUCCESS); 2600 } 2601 2602 /*@ 2603 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix. 2604 2605 Input Parameters: 2606 + mat - the `MATSEQBAIJ` matrix 2607 - indices - the column indices 2608 2609 Level: advanced 2610 2611 Notes: 2612 This can be called if you have precomputed the nonzero structure of the 2613 matrix and want to provide it to the matrix object to improve the performance 2614 of the `MatSetValues()` operation. 2615 2616 You MUST have set the correct numbers of nonzeros per row in the call to 2617 `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted. 2618 2619 MUST be called before any calls to `MatSetValues()` 2620 2621 .seealso: `MATSEQBAIJ`, `MatSetValues()` 2622 @*/ 2623 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices) 2624 { 2625 PetscFunctionBegin; 2626 PetscValidHeaderSpecific(mat, MAT_CLASSID, 1); 2627 PetscValidIntPointer(indices, 2); 2628 PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices)); 2629 PetscFunctionReturn(PETSC_SUCCESS); 2630 } 2631 2632 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[]) 2633 { 2634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2635 PetscInt i, j, n, row, bs, *ai, *aj, mbs; 2636 PetscReal atmp; 2637 PetscScalar *x, zero = 0.0; 2638 MatScalar *aa; 2639 PetscInt ncols, brow, krow, kcol; 2640 2641 PetscFunctionBegin; 2642 /* why is this not a macro???????????????????????????????????????????????????????????????? */ 2643 PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 2644 bs = A->rmap->bs; 2645 aa = a->a; 2646 ai = a->i; 2647 aj = a->j; 2648 mbs = a->mbs; 2649 2650 PetscCall(VecSet(v, zero)); 2651 PetscCall(VecGetArray(v, &x)); 2652 PetscCall(VecGetLocalSize(v, &n)); 2653 PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector"); 2654 for (i = 0; i < mbs; i++) { 2655 ncols = ai[1] - ai[0]; 2656 ai++; 2657 brow = bs * i; 2658 for (j = 0; j < ncols; j++) { 2659 for (kcol = 0; kcol < bs; kcol++) { 2660 for (krow = 0; krow < bs; krow++) { 2661 atmp = PetscAbsScalar(*aa); 2662 aa++; 2663 row = brow + krow; /* row index */ 2664 if (PetscAbsScalar(x[row]) < atmp) { 2665 x[row] = atmp; 2666 if (idx) idx[row] = bs * (*aj) + kcol; 2667 } 2668 } 2669 } 2670 aj++; 2671 } 2672 } 2673 PetscCall(VecRestoreArray(v, &x)); 2674 PetscFunctionReturn(PETSC_SUCCESS); 2675 } 2676 2677 PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str) 2678 { 2679 PetscFunctionBegin; 2680 /* If the two matrices have the same copy implementation, use fast copy. */ 2681 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2682 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2683 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 2684 PetscInt ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs; 2685 2686 PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]); 2687 PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs); 2688 PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs])); 2689 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2690 } else { 2691 PetscCall(MatCopy_Basic(A, B, str)); 2692 } 2693 PetscFunctionReturn(PETSC_SUCCESS); 2694 } 2695 2696 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2697 { 2698 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2699 2700 PetscFunctionBegin; 2701 *array = a->a; 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[]) 2706 { 2707 PetscFunctionBegin; 2708 *array = NULL; 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz) 2713 { 2714 PetscInt bs = Y->rmap->bs, mbs = Y->rmap->N / bs; 2715 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 2716 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 2717 2718 PetscFunctionBegin; 2719 /* Set the number of nonzeros in the new matrix */ 2720 PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz)); 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2725 { 2726 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data; 2727 PetscInt bs = Y->rmap->bs, bs2 = bs * bs; 2728 PetscBLASInt one = 1; 2729 2730 PetscFunctionBegin; 2731 if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) { 2732 PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE; 2733 if (e) { 2734 PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e)); 2735 if (e) { 2736 PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e)); 2737 if (e) str = SAME_NONZERO_PATTERN; 2738 } 2739 } 2740 if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN"); 2741 } 2742 if (str == SAME_NONZERO_PATTERN) { 2743 PetscScalar alpha = a; 2744 PetscBLASInt bnz; 2745 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 2746 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 2747 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 2748 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2749 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2750 } else { 2751 Mat B; 2752 PetscInt *nnz; 2753 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 2754 PetscCall(PetscMalloc1(Y->rmap->N, &nnz)); 2755 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2756 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2757 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 2758 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 2759 PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name)); 2760 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz)); 2761 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 2762 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2763 PetscCall(MatHeaderMerge(Y, &B)); 2764 PetscCall(PetscFree(nnz)); 2765 } 2766 PetscFunctionReturn(PETSC_SUCCESS); 2767 } 2768 2769 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A) 2770 { 2771 #if PetscDefined(USE_COMPLEX) 2772 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2773 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2774 MatScalar *aa = a->a; 2775 2776 PetscFunctionBegin; 2777 for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]); 2778 PetscFunctionReturn(PETSC_SUCCESS); 2779 #else 2780 (void)A; 2781 return PETSC_SUCCESS; 2782 #endif 2783 } 2784 2785 static PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2786 { 2787 #if PetscDefined(USE_COMPLEX) 2788 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2789 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2790 MatScalar *aa = a->a; 2791 2792 PetscFunctionBegin; 2793 for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]); 2794 PetscFunctionReturn(PETSC_SUCCESS); 2795 #else 2796 (void)A; 2797 return PETSC_SUCCESS; 2798 #endif 2799 } 2800 2801 static PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2802 { 2803 #if PetscDefined(USE_COMPLEX) 2804 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2805 PetscInt i, nz = a->bs2 * a->i[a->mbs]; 2806 MatScalar *aa = a->a; 2807 2808 PetscFunctionBegin; 2809 for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2810 PetscFunctionReturn(PETSC_SUCCESS); 2811 #else 2812 (void)A; 2813 return PETSC_SUCCESS; 2814 #endif 2815 } 2816 2817 /* 2818 Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code 2819 */ 2820 static PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2821 { 2822 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2823 PetscInt bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs; 2824 PetscInt nz = a->i[m], row, *jj, mr, col; 2825 2826 PetscFunctionBegin; 2827 *nn = n; 2828 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2829 PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices"); 2830 PetscCall(PetscCalloc1(n, &collengths)); 2831 PetscCall(PetscMalloc1(n + 1, &cia)); 2832 PetscCall(PetscMalloc1(nz, &cja)); 2833 jj = a->j; 2834 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2835 cia[0] = oshift; 2836 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2837 PetscCall(PetscArrayzero(collengths, n)); 2838 jj = a->j; 2839 for (row = 0; row < m; row++) { 2840 mr = a->i[row + 1] - a->i[row]; 2841 for (i = 0; i < mr; i++) { 2842 col = *jj++; 2843 2844 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2845 } 2846 } 2847 PetscCall(PetscFree(collengths)); 2848 *ia = cia; 2849 *ja = cja; 2850 PetscFunctionReturn(PETSC_SUCCESS); 2851 } 2852 2853 static PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 2854 { 2855 PetscFunctionBegin; 2856 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2857 PetscCall(PetscFree(*ia)); 2858 PetscCall(PetscFree(*ja)); 2859 PetscFunctionReturn(PETSC_SUCCESS); 2860 } 2861 2862 /* 2863 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2864 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2865 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2866 */ 2867 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2868 { 2869 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 2870 PetscInt i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs; 2871 PetscInt nz = a->i[m], row, *jj, mr, col; 2872 PetscInt *cspidx; 2873 2874 PetscFunctionBegin; 2875 *nn = n; 2876 if (!ia) PetscFunctionReturn(PETSC_SUCCESS); 2877 2878 PetscCall(PetscCalloc1(n, &collengths)); 2879 PetscCall(PetscMalloc1(n + 1, &cia)); 2880 PetscCall(PetscMalloc1(nz, &cja)); 2881 PetscCall(PetscMalloc1(nz, &cspidx)); 2882 jj = a->j; 2883 for (i = 0; i < nz; i++) collengths[jj[i]]++; 2884 cia[0] = oshift; 2885 for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i]; 2886 PetscCall(PetscArrayzero(collengths, n)); 2887 jj = a->j; 2888 for (row = 0; row < m; row++) { 2889 mr = a->i[row + 1] - a->i[row]; 2890 for (i = 0; i < mr; i++) { 2891 col = *jj++; 2892 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2893 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2894 } 2895 } 2896 PetscCall(PetscFree(collengths)); 2897 *ia = cia; 2898 *ja = cja; 2899 *spidx = cspidx; 2900 PetscFunctionReturn(PETSC_SUCCESS); 2901 } 2902 2903 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done) 2904 { 2905 PetscFunctionBegin; 2906 PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done)); 2907 PetscCall(PetscFree(*spidx)); 2908 PetscFunctionReturn(PETSC_SUCCESS); 2909 } 2910 2911 PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a) 2912 { 2913 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data; 2914 2915 PetscFunctionBegin; 2916 if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL)); 2917 PetscCall(MatShift_Basic(Y, a)); 2918 PetscFunctionReturn(PETSC_SUCCESS); 2919 } 2920 2921 /* -------------------------------------------------------------------*/ 2922 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2923 MatGetRow_SeqBAIJ, 2924 MatRestoreRow_SeqBAIJ, 2925 MatMult_SeqBAIJ_N, 2926 /* 4*/ MatMultAdd_SeqBAIJ_N, 2927 MatMultTranspose_SeqBAIJ, 2928 MatMultTransposeAdd_SeqBAIJ, 2929 NULL, 2930 NULL, 2931 NULL, 2932 /* 10*/ NULL, 2933 MatLUFactor_SeqBAIJ, 2934 NULL, 2935 NULL, 2936 MatTranspose_SeqBAIJ, 2937 /* 15*/ MatGetInfo_SeqBAIJ, 2938 MatEqual_SeqBAIJ, 2939 MatGetDiagonal_SeqBAIJ, 2940 MatDiagonalScale_SeqBAIJ, 2941 MatNorm_SeqBAIJ, 2942 /* 20*/ NULL, 2943 MatAssemblyEnd_SeqBAIJ, 2944 MatSetOption_SeqBAIJ, 2945 MatZeroEntries_SeqBAIJ, 2946 /* 24*/ MatZeroRows_SeqBAIJ, 2947 NULL, 2948 NULL, 2949 NULL, 2950 NULL, 2951 /* 29*/ MatSetUp_Seq_Hash, 2952 NULL, 2953 NULL, 2954 NULL, 2955 NULL, 2956 /* 34*/ MatDuplicate_SeqBAIJ, 2957 NULL, 2958 NULL, 2959 MatILUFactor_SeqBAIJ, 2960 NULL, 2961 /* 39*/ MatAXPY_SeqBAIJ, 2962 MatCreateSubMatrices_SeqBAIJ, 2963 MatIncreaseOverlap_SeqBAIJ, 2964 MatGetValues_SeqBAIJ, 2965 MatCopy_SeqBAIJ, 2966 /* 44*/ NULL, 2967 MatScale_SeqBAIJ, 2968 MatShift_SeqBAIJ, 2969 NULL, 2970 MatZeroRowsColumns_SeqBAIJ, 2971 /* 49*/ NULL, 2972 MatGetRowIJ_SeqBAIJ, 2973 MatRestoreRowIJ_SeqBAIJ, 2974 MatGetColumnIJ_SeqBAIJ, 2975 MatRestoreColumnIJ_SeqBAIJ, 2976 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2977 NULL, 2978 NULL, 2979 NULL, 2980 MatSetValuesBlocked_SeqBAIJ, 2981 /* 59*/ MatCreateSubMatrix_SeqBAIJ, 2982 MatDestroy_SeqBAIJ, 2983 MatView_SeqBAIJ, 2984 NULL, 2985 NULL, 2986 /* 64*/ NULL, 2987 NULL, 2988 NULL, 2989 NULL, 2990 NULL, 2991 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2992 NULL, 2993 MatConvert_Basic, 2994 NULL, 2995 NULL, 2996 /* 74*/ NULL, 2997 MatFDColoringApply_BAIJ, 2998 NULL, 2999 NULL, 3000 NULL, 3001 /* 79*/ NULL, 3002 NULL, 3003 NULL, 3004 NULL, 3005 MatLoad_SeqBAIJ, 3006 /* 84*/ NULL, 3007 NULL, 3008 NULL, 3009 NULL, 3010 NULL, 3011 /* 89*/ NULL, 3012 NULL, 3013 NULL, 3014 NULL, 3015 NULL, 3016 /* 94*/ NULL, 3017 NULL, 3018 NULL, 3019 NULL, 3020 NULL, 3021 /* 99*/ NULL, 3022 NULL, 3023 NULL, 3024 MatConjugate_SeqBAIJ, 3025 NULL, 3026 /*104*/ NULL, 3027 MatRealPart_SeqBAIJ, 3028 MatImaginaryPart_SeqBAIJ, 3029 NULL, 3030 NULL, 3031 /*109*/ NULL, 3032 NULL, 3033 NULL, 3034 NULL, 3035 MatMissingDiagonal_SeqBAIJ, 3036 /*114*/ NULL, 3037 NULL, 3038 NULL, 3039 NULL, 3040 NULL, 3041 /*119*/ NULL, 3042 NULL, 3043 MatMultHermitianTranspose_SeqBAIJ, 3044 MatMultHermitianTransposeAdd_SeqBAIJ, 3045 NULL, 3046 /*124*/ NULL, 3047 MatGetColumnReductions_SeqBAIJ, 3048 MatInvertBlockDiagonal_SeqBAIJ, 3049 NULL, 3050 NULL, 3051 /*129*/ NULL, 3052 NULL, 3053 NULL, 3054 NULL, 3055 NULL, 3056 /*134*/ NULL, 3057 NULL, 3058 NULL, 3059 NULL, 3060 NULL, 3061 /*139*/ MatSetBlockSizes_Default, 3062 NULL, 3063 NULL, 3064 MatFDColoringSetUp_SeqXAIJ, 3065 NULL, 3066 /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ, 3067 MatDestroySubMatrices_SeqBAIJ, 3068 NULL, 3069 NULL, 3070 NULL, 3071 NULL, 3072 /*150*/ NULL, 3073 NULL}; 3074 3075 static PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 3076 { 3077 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3078 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3079 3080 PetscFunctionBegin; 3081 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3082 3083 /* allocate space for values if not already there */ 3084 if (!aij->saved_values) PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); 3085 3086 /* copy values over */ 3087 PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz)); 3088 PetscFunctionReturn(PETSC_SUCCESS); 3089 } 3090 3091 static PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 3092 { 3093 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 3094 PetscInt nz = aij->i[aij->mbs] * aij->bs2; 3095 3096 PetscFunctionBegin; 3097 PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 3098 PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first"); 3099 3100 /* copy values over */ 3101 PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz)); 3102 PetscFunctionReturn(PETSC_SUCCESS); 3103 } 3104 3105 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 3106 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *); 3107 3108 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz) 3109 { 3110 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 3111 PetscInt i, mbs, nbs, bs2; 3112 PetscBool flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE; 3113 3114 PetscFunctionBegin; 3115 if (B->hash_active) { 3116 PetscInt bs; 3117 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 3118 PetscCall(PetscHMapIJVDestroy(&b->ht)); 3119 PetscCall(MatGetBlockSize(B, &bs)); 3120 if (bs > 1) PetscCall(PetscHSetIJDestroy(&b->bht)); 3121 PetscCall(PetscFree(b->dnz)); 3122 PetscCall(PetscFree(b->bdnz)); 3123 B->hash_active = PETSC_FALSE; 3124 } 3125 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 3126 if (nz == MAT_SKIP_ALLOCATION) { 3127 skipallocation = PETSC_TRUE; 3128 nz = 0; 3129 } 3130 3131 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 3132 PetscCall(PetscLayoutSetUp(B->rmap)); 3133 PetscCall(PetscLayoutSetUp(B->cmap)); 3134 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3135 3136 B->preallocated = PETSC_TRUE; 3137 3138 mbs = B->rmap->n / bs; 3139 nbs = B->cmap->n / bs; 3140 bs2 = bs * bs; 3141 3142 PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs); 3143 3144 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3145 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz); 3146 if (nnz) { 3147 for (i = 0; i < mbs; i++) { 3148 PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]); 3149 PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs); 3150 } 3151 } 3152 3153 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat"); 3154 PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL)); 3155 PetscOptionsEnd(); 3156 3157 if (!flg) { 3158 switch (bs) { 3159 case 1: 3160 B->ops->mult = MatMult_SeqBAIJ_1; 3161 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 3162 break; 3163 case 2: 3164 B->ops->mult = MatMult_SeqBAIJ_2; 3165 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 3166 break; 3167 case 3: 3168 B->ops->mult = MatMult_SeqBAIJ_3; 3169 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 3170 break; 3171 case 4: 3172 B->ops->mult = MatMult_SeqBAIJ_4; 3173 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 3174 break; 3175 case 5: 3176 B->ops->mult = MatMult_SeqBAIJ_5; 3177 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 3178 break; 3179 case 6: 3180 B->ops->mult = MatMult_SeqBAIJ_6; 3181 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 3182 break; 3183 case 7: 3184 B->ops->mult = MatMult_SeqBAIJ_7; 3185 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 3186 break; 3187 case 9: { 3188 PetscInt version = 1; 3189 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3190 switch (version) { 3191 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3192 case 1: 3193 B->ops->mult = MatMult_SeqBAIJ_9_AVX2; 3194 B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2; 3195 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3196 break; 3197 #endif 3198 default: 3199 B->ops->mult = MatMult_SeqBAIJ_N; 3200 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3201 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3202 break; 3203 } 3204 break; 3205 } 3206 case 11: 3207 B->ops->mult = MatMult_SeqBAIJ_11; 3208 B->ops->multadd = MatMultAdd_SeqBAIJ_11; 3209 break; 3210 case 12: { 3211 PetscInt version = 1; 3212 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3213 switch (version) { 3214 case 1: 3215 B->ops->mult = MatMult_SeqBAIJ_12_ver1; 3216 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3217 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3218 break; 3219 case 2: 3220 B->ops->mult = MatMult_SeqBAIJ_12_ver2; 3221 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2; 3222 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3223 break; 3224 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3225 case 3: 3226 B->ops->mult = MatMult_SeqBAIJ_12_AVX2; 3227 B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1; 3228 PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3229 break; 3230 #endif 3231 default: 3232 B->ops->mult = MatMult_SeqBAIJ_N; 3233 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3234 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3235 break; 3236 } 3237 break; 3238 } 3239 case 15: { 3240 PetscInt version = 1; 3241 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL)); 3242 switch (version) { 3243 case 1: 3244 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 3245 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3246 break; 3247 case 2: 3248 B->ops->mult = MatMult_SeqBAIJ_15_ver2; 3249 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3250 break; 3251 case 3: 3252 B->ops->mult = MatMult_SeqBAIJ_15_ver3; 3253 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3254 break; 3255 case 4: 3256 B->ops->mult = MatMult_SeqBAIJ_15_ver4; 3257 PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs)); 3258 break; 3259 default: 3260 B->ops->mult = MatMult_SeqBAIJ_N; 3261 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3262 break; 3263 } 3264 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3265 break; 3266 } 3267 default: 3268 B->ops->mult = MatMult_SeqBAIJ_N; 3269 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 3270 PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs)); 3271 break; 3272 } 3273 } 3274 B->ops->sor = MatSOR_SeqBAIJ; 3275 b->mbs = mbs; 3276 b->nbs = nbs; 3277 if (!skipallocation) { 3278 if (!b->imax) { 3279 PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen)); 3280 3281 b->free_imax_ilen = PETSC_TRUE; 3282 } 3283 /* b->ilen will count nonzeros in each block row so far. */ 3284 for (i = 0; i < mbs; i++) b->ilen[i] = 0; 3285 if (!nnz) { 3286 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 3287 else if (nz < 0) nz = 1; 3288 nz = PetscMin(nz, nbs); 3289 for (i = 0; i < mbs; i++) b->imax[i] = nz; 3290 PetscCall(PetscIntMultError(nz, mbs, &nz)); 3291 } else { 3292 PetscInt64 nz64 = 0; 3293 for (i = 0; i < mbs; i++) { 3294 b->imax[i] = nnz[i]; 3295 nz64 += nnz[i]; 3296 } 3297 PetscCall(PetscIntCast(nz64, &nz)); 3298 } 3299 3300 /* allocate the matrix space */ 3301 PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i)); 3302 if (B->structure_only) { 3303 PetscCall(PetscMalloc1(nz, &b->j)); 3304 PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i)); 3305 } else { 3306 PetscInt nzbs2 = 0; 3307 PetscCall(PetscIntMultError(nz, bs2, &nzbs2)); 3308 PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i)); 3309 PetscCall(PetscArrayzero(b->a, nz * bs2)); 3310 } 3311 PetscCall(PetscArrayzero(b->j, nz)); 3312 3313 if (B->structure_only) { 3314 b->singlemalloc = PETSC_FALSE; 3315 b->free_a = PETSC_FALSE; 3316 } else { 3317 b->singlemalloc = PETSC_TRUE; 3318 b->free_a = PETSC_TRUE; 3319 } 3320 b->free_ij = PETSC_TRUE; 3321 3322 b->i[0] = 0; 3323 for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1]; 3324 3325 } else { 3326 b->free_a = PETSC_FALSE; 3327 b->free_ij = PETSC_FALSE; 3328 } 3329 3330 b->bs2 = bs2; 3331 b->mbs = mbs; 3332 b->nz = 0; 3333 b->maxnz = nz; 3334 B->info.nz_unneeded = (PetscReal)b->maxnz * bs2; 3335 B->was_assembled = PETSC_FALSE; 3336 B->assembled = PETSC_FALSE; 3337 if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 3338 PetscFunctionReturn(PETSC_SUCCESS); 3339 } 3340 3341 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 3342 { 3343 PetscInt i, m, nz, nz_max = 0, *nnz; 3344 PetscScalar *values = NULL; 3345 PetscBool roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented; 3346 3347 PetscFunctionBegin; 3348 PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 3349 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 3350 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 3351 PetscCall(PetscLayoutSetUp(B->rmap)); 3352 PetscCall(PetscLayoutSetUp(B->cmap)); 3353 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 3354 m = B->rmap->n / bs; 3355 3356 PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 3357 PetscCall(PetscMalloc1(m + 1, &nnz)); 3358 for (i = 0; i < m; i++) { 3359 nz = ii[i + 1] - ii[i]; 3360 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 3361 nz_max = PetscMax(nz_max, nz); 3362 nnz[i] = nz; 3363 } 3364 PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz)); 3365 PetscCall(PetscFree(nnz)); 3366 3367 values = (PetscScalar *)V; 3368 if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values)); 3369 for (i = 0; i < m; i++) { 3370 PetscInt ncols = ii[i + 1] - ii[i]; 3371 const PetscInt *icols = jj + ii[i]; 3372 if (bs == 1 || !roworiented) { 3373 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 3374 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES)); 3375 } else { 3376 PetscInt j; 3377 for (j = 0; j < ncols; j++) { 3378 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 3379 PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES)); 3380 } 3381 } 3382 } 3383 if (!V) PetscCall(PetscFree(values)); 3384 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3385 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3386 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3387 PetscFunctionReturn(PETSC_SUCCESS); 3388 } 3389 3390 /*@C 3391 MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored 3392 3393 Not Collective 3394 3395 Input Parameter: 3396 . mat - a `MATSEQBAIJ` matrix 3397 3398 Output Parameter: 3399 . array - pointer to the data 3400 3401 Level: intermediate 3402 3403 .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3404 @*/ 3405 PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array) 3406 { 3407 PetscFunctionBegin; 3408 PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array)); 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } 3411 3412 /*@C 3413 MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()` 3414 3415 Not Collective 3416 3417 Input Parameters: 3418 + mat - a `MATSEQBAIJ` matrix 3419 - array - pointer to the data 3420 3421 Level: intermediate 3422 3423 .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()` 3424 @*/ 3425 PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array) 3426 { 3427 PetscFunctionBegin; 3428 PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array)); 3429 PetscFunctionReturn(PETSC_SUCCESS); 3430 } 3431 3432 /*MC 3433 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3434 block sparse compressed row format. 3435 3436 Options Database Keys: 3437 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3438 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 3439 3440 Level: beginner 3441 3442 Notes: 3443 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 3444 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 3445 3446 Run with -info to see what version of the matrix-vector product is being used 3447 3448 .seealso: `MatCreateSeqBAIJ()` 3449 M*/ 3450 3451 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *); 3452 3453 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3454 { 3455 PetscMPIInt size; 3456 Mat_SeqBAIJ *b; 3457 3458 PetscFunctionBegin; 3459 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 3460 PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1"); 3461 3462 PetscCall(PetscNew(&b)); 3463 B->data = (void *)b; 3464 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 3465 3466 b->row = NULL; 3467 b->col = NULL; 3468 b->icol = NULL; 3469 b->reallocs = 0; 3470 b->saved_values = NULL; 3471 3472 b->roworiented = PETSC_TRUE; 3473 b->nonew = 0; 3474 b->diag = NULL; 3475 B->spptr = NULL; 3476 B->info.nz_unneeded = (PetscReal)b->maxnz * b->bs2; 3477 b->keepnonzeropattern = PETSC_FALSE; 3478 3479 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ)); 3480 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ)); 3481 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ)); 3482 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ)); 3483 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ)); 3484 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ)); 3485 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ)); 3486 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ)); 3487 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ)); 3488 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ)); 3489 #if defined(PETSC_HAVE_HYPRE) 3490 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE)); 3491 #endif 3492 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS)); 3493 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ)); 3494 PetscFunctionReturn(PETSC_SUCCESS); 3495 } 3496 3497 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace) 3498 { 3499 Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data; 3500 PetscInt i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2; 3501 3502 PetscFunctionBegin; 3503 PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix"); 3504 3505 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3506 c->imax = a->imax; 3507 c->ilen = a->ilen; 3508 c->free_imax_ilen = PETSC_FALSE; 3509 } else { 3510 PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen)); 3511 for (i = 0; i < mbs; i++) { 3512 c->imax[i] = a->imax[i]; 3513 c->ilen[i] = a->ilen[i]; 3514 } 3515 c->free_imax_ilen = PETSC_TRUE; 3516 } 3517 3518 /* allocate the matrix space */ 3519 if (mallocmatspace) { 3520 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3521 PetscCall(PetscCalloc1(bs2 * nz, &c->a)); 3522 3523 c->i = a->i; 3524 c->j = a->j; 3525 c->singlemalloc = PETSC_FALSE; 3526 c->free_a = PETSC_TRUE; 3527 c->free_ij = PETSC_FALSE; 3528 c->parent = A; 3529 C->preallocated = PETSC_TRUE; 3530 C->assembled = PETSC_TRUE; 3531 3532 PetscCall(PetscObjectReference((PetscObject)A)); 3533 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3534 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3535 } else { 3536 PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i)); 3537 3538 c->singlemalloc = PETSC_TRUE; 3539 c->free_a = PETSC_TRUE; 3540 c->free_ij = PETSC_TRUE; 3541 3542 PetscCall(PetscArraycpy(c->i, a->i, mbs + 1)); 3543 if (mbs > 0) { 3544 PetscCall(PetscArraycpy(c->j, a->j, nz)); 3545 if (cpvalues == MAT_COPY_VALUES) { 3546 PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz)); 3547 } else { 3548 PetscCall(PetscArrayzero(c->a, bs2 * nz)); 3549 } 3550 } 3551 C->preallocated = PETSC_TRUE; 3552 C->assembled = PETSC_TRUE; 3553 } 3554 } 3555 3556 c->roworiented = a->roworiented; 3557 c->nonew = a->nonew; 3558 3559 PetscCall(PetscLayoutReference(A->rmap, &C->rmap)); 3560 PetscCall(PetscLayoutReference(A->cmap, &C->cmap)); 3561 3562 c->bs2 = a->bs2; 3563 c->mbs = a->mbs; 3564 c->nbs = a->nbs; 3565 3566 if (a->diag) { 3567 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3568 c->diag = a->diag; 3569 c->free_diag = PETSC_FALSE; 3570 } else { 3571 PetscCall(PetscMalloc1(mbs + 1, &c->diag)); 3572 for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i]; 3573 c->free_diag = PETSC_TRUE; 3574 } 3575 } else c->diag = NULL; 3576 3577 c->nz = a->nz; 3578 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3579 c->solve_work = NULL; 3580 c->mult_work = NULL; 3581 c->sor_workt = NULL; 3582 c->sor_work = NULL; 3583 3584 c->compressedrow.use = a->compressedrow.use; 3585 c->compressedrow.nrows = a->compressedrow.nrows; 3586 if (a->compressedrow.use) { 3587 i = a->compressedrow.nrows; 3588 PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex)); 3589 PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1)); 3590 PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i)); 3591 } else { 3592 c->compressedrow.use = PETSC_FALSE; 3593 c->compressedrow.i = NULL; 3594 c->compressedrow.rindex = NULL; 3595 } 3596 C->nonzerostate = A->nonzerostate; 3597 3598 PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist)); 3599 PetscFunctionReturn(PETSC_SUCCESS); 3600 } 3601 3602 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B) 3603 { 3604 PetscFunctionBegin; 3605 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B)); 3606 PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n)); 3607 PetscCall(MatSetType(*B, MATSEQBAIJ)); 3608 PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE)); 3609 PetscFunctionReturn(PETSC_SUCCESS); 3610 } 3611 3612 /* Used for both SeqBAIJ and SeqSBAIJ matrices */ 3613 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer) 3614 { 3615 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3616 PetscInt *rowidxs, *colidxs; 3617 PetscScalar *matvals; 3618 3619 PetscFunctionBegin; 3620 PetscCall(PetscViewerSetUp(viewer)); 3621 3622 /* read matrix header */ 3623 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3624 PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3625 M = header[1]; 3626 N = header[2]; 3627 nz = header[3]; 3628 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3629 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3630 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ"); 3631 3632 /* set block sizes from the viewer's .info file */ 3633 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3634 /* set local and global sizes if not set already */ 3635 if (mat->rmap->n < 0) mat->rmap->n = M; 3636 if (mat->cmap->n < 0) mat->cmap->n = N; 3637 if (mat->rmap->N < 0) mat->rmap->N = M; 3638 if (mat->cmap->N < 0) mat->cmap->N = N; 3639 PetscCall(PetscLayoutSetUp(mat->rmap)); 3640 PetscCall(PetscLayoutSetUp(mat->cmap)); 3641 3642 /* check if the matrix sizes are correct */ 3643 PetscCall(MatGetSize(mat, &rows, &cols)); 3644 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3645 PetscCall(MatGetBlockSize(mat, &bs)); 3646 PetscCall(MatGetLocalSize(mat, &m, &n)); 3647 mbs = m / bs; 3648 nbs = n / bs; 3649 3650 /* read in row lengths, column indices and nonzero values */ 3651 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3652 PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT)); 3653 rowidxs[0] = 0; 3654 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3655 sum = rowidxs[m]; 3656 PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3657 3658 /* read in column indices and nonzero values */ 3659 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals)); 3660 PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT)); 3661 PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR)); 3662 3663 { /* preallocate matrix storage */ 3664 PetscBT bt; /* helper bit set to count nonzeros */ 3665 PetscInt *nnz; 3666 PetscBool sbaij; 3667 3668 PetscCall(PetscBTCreate(nbs, &bt)); 3669 PetscCall(PetscCalloc1(mbs, &nnz)); 3670 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij)); 3671 for (i = 0; i < mbs; i++) { 3672 PetscCall(PetscBTMemzero(nbs, bt)); 3673 for (k = 0; k < bs; k++) { 3674 PetscInt row = bs * i + k; 3675 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3676 PetscInt col = colidxs[j]; 3677 if (!sbaij || col >= row) 3678 if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++; 3679 } 3680 } 3681 } 3682 PetscCall(PetscBTDestroy(&bt)); 3683 PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz)); 3684 PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz)); 3685 PetscCall(PetscFree(nnz)); 3686 } 3687 3688 /* store matrix values */ 3689 for (i = 0; i < m; i++) { 3690 PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1]; 3691 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3692 } 3693 3694 PetscCall(PetscFree(rowidxs)); 3695 PetscCall(PetscFree2(colidxs, matvals)); 3696 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3697 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3698 PetscFunctionReturn(PETSC_SUCCESS); 3699 } 3700 3701 PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer) 3702 { 3703 PetscBool isbinary; 3704 3705 PetscFunctionBegin; 3706 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3707 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3708 PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer)); 3709 PetscFunctionReturn(PETSC_SUCCESS); 3710 } 3711 3712 /*@C 3713 MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block 3714 compressed row) format. For good matrix assembly performance the 3715 user should preallocate the matrix storage by setting the parameter nz 3716 (or the array nnz). By setting these parameters accurately, performance 3717 during matrix assembly can be increased by more than a factor of 50. 3718 3719 Collective 3720 3721 Input Parameters: 3722 + comm - MPI communicator, set to `PETSC_COMM_SELF` 3723 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3724 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3725 . m - number of rows 3726 . n - number of columns 3727 . nz - number of nonzero blocks per block row (same for all rows) 3728 - nnz - array containing the number of nonzero blocks in the various block rows 3729 (possibly different for each block row) or NULL 3730 3731 Output Parameter: 3732 . A - the matrix 3733 3734 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3735 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3736 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 3737 3738 Options Database Keys: 3739 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3740 - -mat_block_size - size of the blocks to use 3741 3742 Level: intermediate 3743 3744 Notes: 3745 The number of rows and columns must be divisible by blocksize. 3746 3747 If the nnz parameter is given then the nz parameter is ignored 3748 3749 A nonzero block is any block that as 1 or more nonzeros in it 3750 3751 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3752 storage. That is, the stored row and column indices can begin at 3753 either one (as in Fortran) or zero. See the users' manual for details. 3754 3755 Specify the preallocated storage with either nz or nnz (not both). 3756 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3757 allocation. See [Sparse Matrices](sec_matsparse) for details. 3758 matrices. 3759 3760 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 3761 @*/ 3762 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A) 3763 { 3764 PetscFunctionBegin; 3765 PetscCall(MatCreate(comm, A)); 3766 PetscCall(MatSetSizes(*A, m, n, m, n)); 3767 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3768 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz)); 3769 PetscFunctionReturn(PETSC_SUCCESS); 3770 } 3771 3772 /*@C 3773 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3774 per row in the matrix. For good matrix assembly performance the 3775 user should preallocate the matrix storage by setting the parameter nz 3776 (or the array nnz). By setting these parameters accurately, performance 3777 during matrix assembly can be increased by more than a factor of 50. 3778 3779 Collective 3780 3781 Input Parameters: 3782 + B - the matrix 3783 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3784 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3785 . nz - number of block nonzeros per block row (same for all rows) 3786 - nnz - array containing the number of block nonzeros in the various block rows 3787 (possibly different for each block row) or NULL 3788 3789 Options Database Keys: 3790 + -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower) 3791 - -mat_block_size - size of the blocks to use 3792 3793 Level: intermediate 3794 3795 Notes: 3796 If the nnz parameter is given then the nz parameter is ignored 3797 3798 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3799 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3800 You can also run with the option -info and look for messages with the string 3801 malloc in them to see if additional memory allocation was needed. 3802 3803 The `MATSEQBAIJ` format is fully compatible with standard Fortran 77 3804 storage. That is, the stored row and column indices can begin at 3805 either one (as in Fortran) or zero. See the users' manual for details. 3806 3807 Specify the preallocated storage with either nz or nnz (not both). 3808 Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory 3809 allocation. See [Sparse Matrices](sec_matsparse) for details. 3810 3811 .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()` 3812 @*/ 3813 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[]) 3814 { 3815 PetscFunctionBegin; 3816 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3817 PetscValidType(B, 1); 3818 PetscValidLogicalCollectiveInt(B, bs, 2); 3819 PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz)); 3820 PetscFunctionReturn(PETSC_SUCCESS); 3821 } 3822 3823 /*@C 3824 MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values 3825 3826 Collective 3827 3828 Input Parameters: 3829 + B - the matrix 3830 . i - the indices into j for the start of each local row (starts with zero) 3831 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3832 - v - optional values in the matrix 3833 3834 Level: advanced 3835 3836 Notes: 3837 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 3838 may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 3839 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3840 `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3841 block column and the second index is over columns within a block. 3842 3843 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 3844 3845 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ` 3846 @*/ 3847 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3848 { 3849 PetscFunctionBegin; 3850 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3851 PetscValidType(B, 1); 3852 PetscValidLogicalCollectiveInt(B, bs, 2); 3853 PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 3854 PetscFunctionReturn(PETSC_SUCCESS); 3855 } 3856 3857 /*@ 3858 MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user. 3859 3860 Collective 3861 3862 Input Parameters: 3863 + comm - must be an MPI communicator of size 1 3864 . bs - size of block 3865 . m - number of rows 3866 . n - number of columns 3867 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix 3868 . j - column indices 3869 - a - matrix values 3870 3871 Output Parameter: 3872 . mat - the matrix 3873 3874 Level: advanced 3875 3876 Notes: 3877 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3878 once the matrix is destroyed 3879 3880 You cannot set new nonzero locations into this matrix, that will generate an error. 3881 3882 The i and j indices are 0 based 3883 3884 When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format 3885 3886 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3887 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3888 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3889 with column-major ordering within blocks. 3890 3891 .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()` 3892 @*/ 3893 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat) 3894 { 3895 Mat_SeqBAIJ *baij; 3896 3897 PetscFunctionBegin; 3898 PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs); 3899 if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3900 3901 PetscCall(MatCreate(comm, mat)); 3902 PetscCall(MatSetSizes(*mat, m, n, m, n)); 3903 PetscCall(MatSetType(*mat, MATSEQBAIJ)); 3904 PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL)); 3905 baij = (Mat_SeqBAIJ *)(*mat)->data; 3906 PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen)); 3907 3908 baij->i = i; 3909 baij->j = j; 3910 baij->a = a; 3911 3912 baij->singlemalloc = PETSC_FALSE; 3913 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3914 baij->free_a = PETSC_FALSE; 3915 baij->free_ij = PETSC_FALSE; 3916 baij->free_imax_ilen = PETSC_TRUE; 3917 3918 for (PetscInt ii = 0; ii < m; ii++) { 3919 const PetscInt row_len = i[ii + 1] - i[ii]; 3920 3921 baij->ilen[ii] = baij->imax[ii] = row_len; 3922 PetscCheck(row_len >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, row_len); 3923 } 3924 if (PetscDefined(USE_DEBUG)) { 3925 for (PetscInt ii = 0; ii < baij->i[m]; ii++) { 3926 PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3927 PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]); 3928 } 3929 } 3930 3931 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3932 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3933 PetscFunctionReturn(PETSC_SUCCESS); 3934 } 3935 3936 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3937 { 3938 PetscFunctionBegin; 3939 PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat)); 3940 PetscFunctionReturn(PETSC_SUCCESS); 3941 } 3942