1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) { 16 Mat preallocator; 17 PetscInt r, rstart, rend; 18 PetscInt bs, i, m, n, M, N; 19 PetscBool cong = PETSC_TRUE; 20 21 PetscFunctionBegin; 22 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 23 PetscValidLogicalCollectiveInt(B, nm, 2); 24 for (i = 0; i < nm; i++) { 25 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 26 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 27 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 28 } 29 PetscValidLogicalCollectiveBool(B, fill, 5); 30 PetscCall(MatGetBlockSize(B, &bs)); 31 PetscCall(MatGetSize(B, &M, &N)); 32 PetscCall(MatGetLocalSize(B, &m, &n)); 33 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 34 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 35 PetscCall(MatSetBlockSize(preallocator, bs)); 36 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 37 PetscCall(MatSetUp(preallocator)); 38 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 39 for (r = rstart; r < rend; ++r) { 40 PetscInt ncols; 41 const PetscInt *row; 42 const PetscScalar *vals; 43 44 for (i = 0; i < nm; i++) { 45 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 46 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 47 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 48 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 49 } 50 } 51 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 52 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 53 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 54 PetscCall(MatDestroy(&preallocator)); 55 PetscFunctionReturn(0); 56 } 57 58 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 59 Mat B; 60 PetscInt r; 61 62 PetscFunctionBegin; 63 if (reuse != MAT_REUSE_MATRIX) { 64 PetscBool symm = PETSC_TRUE, isdense; 65 PetscInt bs; 66 67 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 68 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 69 PetscCall(MatSetType(B, newtype)); 70 PetscCall(MatGetBlockSize(A, &bs)); 71 PetscCall(MatSetBlockSize(B, bs)); 72 PetscCall(PetscLayoutSetUp(B->rmap)); 73 PetscCall(PetscLayoutSetUp(B->cmap)); 74 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 75 if (!isdense) { 76 PetscCall(MatGetRowUpperTriangular(A)); 77 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 78 PetscCall(MatRestoreRowUpperTriangular(A)); 79 } else { 80 PetscCall(MatSetUp(B)); 81 } 82 } else { 83 B = *newmat; 84 PetscCall(MatZeroEntries(B)); 85 } 86 87 PetscCall(MatGetRowUpperTriangular(A)); 88 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 89 PetscInt ncols; 90 const PetscInt *row; 91 const PetscScalar *vals; 92 93 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 94 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 95 #if defined(PETSC_USE_COMPLEX) 96 if (A->hermitian == PETSC_BOOL3_TRUE) { 97 PetscInt i; 98 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 99 } else { 100 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 101 } 102 #else 103 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 104 #endif 105 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 106 } 107 PetscCall(MatRestoreRowUpperTriangular(A)); 108 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 109 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 110 111 if (reuse == MAT_INPLACE_MATRIX) { 112 PetscCall(MatHeaderReplace(A, &B)); 113 } else { 114 *newmat = B; 115 } 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) { 120 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 121 122 PetscFunctionBegin; 123 PetscCall(MatStoreValues(aij->A)); 124 PetscCall(MatStoreValues(aij->B)); 125 PetscFunctionReturn(0); 126 } 127 128 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) { 129 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(0); 135 } 136 137 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 138 { \ 139 brow = row / bs; \ 140 rp = aj + ai[brow]; \ 141 ap = aa + bs2 * ai[brow]; \ 142 rmax = aimax[brow]; \ 143 nrow = ailen[brow]; \ 144 bcol = col / bs; \ 145 ridx = row % bs; \ 146 cidx = col % bs; \ 147 low = 0; \ 148 high = nrow; \ 149 while (high - low > 3) { \ 150 t = (low + high) / 2; \ 151 if (rp[t] > bcol) high = t; \ 152 else low = t; \ 153 } \ 154 for (_i = low; _i < high; _i++) { \ 155 if (rp[_i] > bcol) break; \ 156 if (rp[_i] == bcol) { \ 157 bap = ap + bs2 * _i + bs * cidx + ridx; \ 158 if (addv == ADD_VALUES) *bap += value; \ 159 else *bap = value; \ 160 goto a_noinsert; \ 161 } \ 162 } \ 163 if (a->nonew == 1) goto a_noinsert; \ 164 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 165 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 169 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 170 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 171 rp[_i] = bcol; \ 172 ap[bs2 * _i + bs * cidx + ridx] = value; \ 173 A->nonzerostate++; \ 174 a_noinsert:; \ 175 ailen[brow] = nrow; \ 176 } 177 178 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 179 { \ 180 brow = row / bs; \ 181 rp = bj + bi[brow]; \ 182 ap = ba + bs2 * bi[brow]; \ 183 rmax = bimax[brow]; \ 184 nrow = bilen[brow]; \ 185 bcol = col / bs; \ 186 ridx = row % bs; \ 187 cidx = col % bs; \ 188 low = 0; \ 189 high = nrow; \ 190 while (high - low > 3) { \ 191 t = (low + high) / 2; \ 192 if (rp[t] > bcol) high = t; \ 193 else low = t; \ 194 } \ 195 for (_i = low; _i < high; _i++) { \ 196 if (rp[_i] > bcol) break; \ 197 if (rp[_i] == bcol) { \ 198 bap = ap + bs2 * _i + bs * cidx + ridx; \ 199 if (addv == ADD_VALUES) *bap += value; \ 200 else *bap = value; \ 201 goto b_noinsert; \ 202 } \ 203 } \ 204 if (b->nonew == 1) goto b_noinsert; \ 205 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 206 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 207 N = nrow++ - 1; \ 208 /* shift up all the later entries in this row */ \ 209 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 210 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 211 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 212 rp[_i] = bcol; \ 213 ap[bs2 * _i + bs * cidx + ridx] = value; \ 214 B->nonzerostate++; \ 215 b_noinsert:; \ 216 bilen[brow] = nrow; \ 217 } 218 219 /* Only add/insert a(i,j) with i<=j (blocks). 220 Any a(i,j) with i>j input by user is ingored or generates an error 221 */ 222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 223 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 224 MatScalar value; 225 PetscBool roworiented = baij->roworiented; 226 PetscInt i, j, row, col; 227 PetscInt rstart_orig = mat->rmap->rstart; 228 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 229 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 230 231 /* Some Variables required in the macro */ 232 Mat A = baij->A; 233 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 234 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 235 MatScalar *aa = a->a; 236 237 Mat B = baij->B; 238 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 239 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 240 MatScalar *ba = b->a; 241 242 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 243 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 244 MatScalar *ap, *bap; 245 246 /* for stash */ 247 PetscInt n_loc, *in_loc = NULL; 248 MatScalar *v_loc = NULL; 249 250 PetscFunctionBegin; 251 if (!baij->donotstash) { 252 if (n > baij->n_loc) { 253 PetscCall(PetscFree(baij->in_loc)); 254 PetscCall(PetscFree(baij->v_loc)); 255 PetscCall(PetscMalloc1(n, &baij->in_loc)); 256 PetscCall(PetscMalloc1(n, &baij->v_loc)); 257 258 baij->n_loc = n; 259 } 260 in_loc = baij->in_loc; 261 v_loc = baij->v_loc; 262 } 263 264 for (i = 0; i < m; i++) { 265 if (im[i] < 0) continue; 266 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 267 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 268 row = im[i] - rstart_orig; /* local row index */ 269 for (j = 0; j < n; j++) { 270 if (im[i] / bs > in[j] / bs) { 271 if (a->ignore_ltriangular) { 272 continue; /* ignore lower triangular blocks */ 273 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 274 } 275 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 276 col = in[j] - cstart_orig; /* local col index */ 277 brow = row / bs; 278 bcol = col / bs; 279 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 280 if (roworiented) value = v[i * n + j]; 281 else value = v[i + j * m]; 282 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 283 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 284 } else if (in[j] < 0) { 285 continue; 286 } else { 287 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 288 /* off-diag entry (B) */ 289 if (mat->was_assembled) { 290 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col)); 293 col = col - 1; 294 #else 295 col = baij->colmap[in[j] / bs] - 1; 296 #endif 297 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 298 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 299 col = in[j]; 300 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 301 B = baij->B; 302 b = (Mat_SeqBAIJ *)(B)->data; 303 bimax = b->imax; 304 bi = b->i; 305 bilen = b->ilen; 306 bj = b->j; 307 ba = b->a; 308 } else col += in[j] % bs; 309 } else col = in[j]; 310 if (roworiented) value = v[i * n + j]; 311 else value = v[i + j * m]; 312 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 313 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 314 } 315 } 316 } else { /* off processor entry */ 317 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 318 if (!baij->donotstash) { 319 mat->assembled = PETSC_FALSE; 320 n_loc = 0; 321 for (j = 0; j < n; j++) { 322 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 323 in_loc[n_loc] = in[j]; 324 if (roworiented) { 325 v_loc[n_loc] = v[i * n + j]; 326 } else { 327 v_loc[n_loc] = v[j * m + i]; 328 } 329 n_loc++; 330 } 331 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 332 } 333 } 334 } 335 PetscFunctionReturn(0); 336 } 337 338 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) { 339 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 340 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 341 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 342 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 343 PetscBool roworiented = a->roworiented; 344 const PetscScalar *value = v; 345 MatScalar *ap, *aa = a->a, *bap; 346 347 PetscFunctionBegin; 348 if (col < row) { 349 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 350 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 351 } 352 rp = aj + ai[row]; 353 ap = aa + bs2 * ai[row]; 354 rmax = imax[row]; 355 nrow = ailen[row]; 356 value = v; 357 low = 0; 358 high = nrow; 359 360 while (high - low > 7) { 361 t = (low + high) / 2; 362 if (rp[t] > col) high = t; 363 else low = t; 364 } 365 for (i = low; i < high; i++) { 366 if (rp[i] > col) break; 367 if (rp[i] == col) { 368 bap = ap + bs2 * i; 369 if (roworiented) { 370 if (is == ADD_VALUES) { 371 for (ii = 0; ii < bs; ii++) { 372 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 373 } 374 } else { 375 for (ii = 0; ii < bs; ii++) { 376 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 377 } 378 } 379 } else { 380 if (is == ADD_VALUES) { 381 for (ii = 0; ii < bs; ii++) { 382 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 383 } 384 } else { 385 for (ii = 0; ii < bs; ii++) { 386 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 387 } 388 } 389 } 390 goto noinsert2; 391 } 392 } 393 if (nonew == 1) goto noinsert2; 394 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 395 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 396 N = nrow++ - 1; 397 high++; 398 /* shift up all the later entries in this row */ 399 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 400 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 401 rp[i] = col; 402 bap = ap + bs2 * i; 403 if (roworiented) { 404 for (ii = 0; ii < bs; ii++) { 405 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 406 } 407 } else { 408 for (ii = 0; ii < bs; ii++) { 409 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 410 } 411 } 412 noinsert2:; 413 ailen[row] = nrow; 414 PetscFunctionReturn(0); 415 } 416 417 /* 418 This routine is exactly duplicated in mpibaij.c 419 */ 420 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) { 421 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 422 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 423 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 424 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 425 PetscBool roworiented = a->roworiented; 426 const PetscScalar *value = v; 427 MatScalar *ap, *aa = a->a, *bap; 428 429 PetscFunctionBegin; 430 rp = aj + ai[row]; 431 ap = aa + bs2 * ai[row]; 432 rmax = imax[row]; 433 nrow = ailen[row]; 434 low = 0; 435 high = nrow; 436 value = v; 437 while (high - low > 7) { 438 t = (low + high) / 2; 439 if (rp[t] > col) high = t; 440 else low = t; 441 } 442 for (i = low; i < high; i++) { 443 if (rp[i] > col) break; 444 if (rp[i] == col) { 445 bap = ap + bs2 * i; 446 if (roworiented) { 447 if (is == ADD_VALUES) { 448 for (ii = 0; ii < bs; ii++) { 449 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 450 } 451 } else { 452 for (ii = 0; ii < bs; ii++) { 453 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 454 } 455 } 456 } else { 457 if (is == ADD_VALUES) { 458 for (ii = 0; ii < bs; ii++, value += bs) { 459 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 460 bap += bs; 461 } 462 } else { 463 for (ii = 0; ii < bs; ii++, value += bs) { 464 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 465 bap += bs; 466 } 467 } 468 } 469 goto noinsert2; 470 } 471 } 472 if (nonew == 1) goto noinsert2; 473 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 474 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 475 N = nrow++ - 1; 476 high++; 477 /* shift up all the later entries in this row */ 478 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 479 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 480 rp[i] = col; 481 bap = ap + bs2 * i; 482 if (roworiented) { 483 for (ii = 0; ii < bs; ii++) { 484 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 485 } 486 } else { 487 for (ii = 0; ii < bs; ii++) { 488 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 489 } 490 } 491 noinsert2:; 492 ailen[row] = nrow; 493 PetscFunctionReturn(0); 494 } 495 496 /* 497 This routine could be optimized by removing the need for the block copy below and passing stride information 498 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 499 */ 500 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) { 501 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 502 const MatScalar *value; 503 MatScalar *barray = baij->barray; 504 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 505 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 506 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 507 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 508 509 PetscFunctionBegin; 510 if (!barray) { 511 PetscCall(PetscMalloc1(bs2, &barray)); 512 baij->barray = barray; 513 } 514 515 if (roworiented) { 516 stepval = (n - 1) * bs; 517 } else { 518 stepval = (m - 1) * bs; 519 } 520 for (i = 0; i < m; i++) { 521 if (im[i] < 0) continue; 522 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 for (j = 0; j < n; j++) { 526 if (im[i] > in[j]) { 527 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 528 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 529 } 530 /* If NumCol = 1 then a copy is not required */ 531 if ((roworiented) && (n == 1)) { 532 barray = (MatScalar *)v + i * bs2; 533 } else if ((!roworiented) && (m == 1)) { 534 barray = (MatScalar *)v + j * bs2; 535 } else { /* Here a copy is required */ 536 if (roworiented) { 537 value = v + i * (stepval + bs) * bs + j * bs; 538 } else { 539 value = v + j * (stepval + bs) * bs + i * bs; 540 } 541 for (ii = 0; ii < bs; ii++, value += stepval) { 542 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 543 } 544 barray -= bs2; 545 } 546 547 if (in[j] >= cstart && in[j] < cend) { 548 col = in[j] - cstart; 549 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 550 } else if (in[j] < 0) { 551 continue; 552 } else { 553 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 554 if (mat->was_assembled) { 555 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 556 557 #if defined(PETSC_USE_DEBUG) 558 #if defined(PETSC_USE_CTABLE) 559 { 560 PetscInt data; 561 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 562 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 563 } 564 #else 565 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 566 #endif 567 #endif 568 #if defined(PETSC_USE_CTABLE) 569 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 570 col = (col - 1) / bs; 571 #else 572 col = (baij->colmap[in[j]] - 1) / bs; 573 #endif 574 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 575 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 576 col = in[j]; 577 } 578 } else col = in[j]; 579 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 580 } 581 } 582 } else { 583 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 584 if (!baij->donotstash) { 585 if (roworiented) { 586 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 587 } else { 588 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 589 } 590 } 591 } 592 } 593 PetscFunctionReturn(0); 594 } 595 596 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 597 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 598 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 599 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 600 601 PetscFunctionBegin; 602 for (i = 0; i < m; i++) { 603 if (idxm[i] < 0) continue; /* negative row */ 604 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 605 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 606 row = idxm[i] - bsrstart; 607 for (j = 0; j < n; j++) { 608 if (idxn[j] < 0) continue; /* negative column */ 609 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 610 if (idxn[j] >= bscstart && idxn[j] < bscend) { 611 col = idxn[j] - bscstart; 612 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 613 } else { 614 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 615 #if defined(PETSC_USE_CTABLE) 616 PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data)); 617 data--; 618 #else 619 data = baij->colmap[idxn[j] / bs] - 1; 620 #endif 621 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 622 else { 623 col = data + idxn[j] % bs; 624 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 625 } 626 } 627 } 628 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 629 } 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) { 634 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 635 PetscReal sum[2], *lnorm2; 636 637 PetscFunctionBegin; 638 if (baij->size == 1) { 639 PetscCall(MatNorm(baij->A, type, norm)); 640 } else { 641 if (type == NORM_FROBENIUS) { 642 PetscCall(PetscMalloc1(2, &lnorm2)); 643 PetscCall(MatNorm(baij->A, type, lnorm2)); 644 *lnorm2 = (*lnorm2) * (*lnorm2); 645 lnorm2++; /* squar power of norm(A) */ 646 PetscCall(MatNorm(baij->B, type, lnorm2)); 647 *lnorm2 = (*lnorm2) * (*lnorm2); 648 lnorm2--; /* squar power of norm(B) */ 649 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 650 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 651 PetscCall(PetscFree(lnorm2)); 652 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 653 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 654 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 655 PetscReal *rsum, *rsum2, vabs; 656 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 657 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 658 MatScalar *v; 659 660 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 661 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 662 /* Amat */ 663 v = amat->a; 664 jj = amat->j; 665 for (brow = 0; brow < mbs; brow++) { 666 grow = bs * (rstart + brow); 667 nz = amat->i[brow + 1] - amat->i[brow]; 668 for (bcol = 0; bcol < nz; bcol++) { 669 gcol = bs * (rstart + *jj); 670 jj++; 671 for (col = 0; col < bs; col++) { 672 for (row = 0; row < bs; row++) { 673 vabs = PetscAbsScalar(*v); 674 v++; 675 rsum[gcol + col] += vabs; 676 /* non-diagonal block */ 677 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 678 } 679 } 680 } 681 PetscCall(PetscLogFlops(nz * bs * bs)); 682 } 683 /* Bmat */ 684 v = bmat->a; 685 jj = bmat->j; 686 for (brow = 0; brow < mbs; brow++) { 687 grow = bs * (rstart + brow); 688 nz = bmat->i[brow + 1] - bmat->i[brow]; 689 for (bcol = 0; bcol < nz; bcol++) { 690 gcol = bs * garray[*jj]; 691 jj++; 692 for (col = 0; col < bs; col++) { 693 for (row = 0; row < bs; row++) { 694 vabs = PetscAbsScalar(*v); 695 v++; 696 rsum[gcol + col] += vabs; 697 rsum[grow + row] += vabs; 698 } 699 } 700 } 701 PetscCall(PetscLogFlops(nz * bs * bs)); 702 } 703 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 704 *norm = 0.0; 705 for (col = 0; col < mat->cmap->N; col++) { 706 if (rsum2[col] > *norm) *norm = rsum2[col]; 707 } 708 PetscCall(PetscFree2(rsum, rsum2)); 709 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 710 } 711 PetscFunctionReturn(0); 712 } 713 714 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) { 715 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 716 PetscInt nstash, reallocs; 717 718 PetscFunctionBegin; 719 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 720 721 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 722 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 723 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 724 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 725 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 726 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 727 PetscFunctionReturn(0); 728 } 729 730 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) { 731 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 732 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 733 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 734 PetscInt *row, *col; 735 PetscBool other_disassembled; 736 PetscMPIInt n; 737 PetscBool r1, r2, r3; 738 MatScalar *val; 739 740 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 741 PetscFunctionBegin; 742 if (!baij->donotstash && !mat->nooffprocentries) { 743 while (1) { 744 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 745 if (!flg) break; 746 747 for (i = 0; i < n;) { 748 /* Now identify the consecutive vals belonging to the same row */ 749 for (j = i, rstart = row[j]; j < n; j++) { 750 if (row[j] != rstart) break; 751 } 752 if (j < n) ncols = j - i; 753 else ncols = n - i; 754 /* Now assemble all these values with a single function call */ 755 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 756 i = j; 757 } 758 } 759 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 760 /* Now process the block-stash. Since the values are stashed column-oriented, 761 set the roworiented flag to column oriented, and after MatSetValues() 762 restore the original flags */ 763 r1 = baij->roworiented; 764 r2 = a->roworiented; 765 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 766 767 baij->roworiented = PETSC_FALSE; 768 a->roworiented = PETSC_FALSE; 769 770 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 771 while (1) { 772 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 773 if (!flg) break; 774 775 for (i = 0; i < n;) { 776 /* Now identify the consecutive vals belonging to the same row */ 777 for (j = i, rstart = row[j]; j < n; j++) { 778 if (row[j] != rstart) break; 779 } 780 if (j < n) ncols = j - i; 781 else ncols = n - i; 782 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 783 i = j; 784 } 785 } 786 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 787 788 baij->roworiented = r1; 789 a->roworiented = r2; 790 791 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 792 } 793 794 PetscCall(MatAssemblyBegin(baij->A, mode)); 795 PetscCall(MatAssemblyEnd(baij->A, mode)); 796 797 /* determine if any processor has disassembled, if so we must 798 also disassemble ourselves, in order that we may reassemble. */ 799 /* 800 if nonzero structure of submatrix B cannot change then we know that 801 no processor disassembled thus we can skip this stuff 802 */ 803 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 804 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 805 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 806 } 807 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 809 PetscCall(MatAssemblyBegin(baij->B, mode)); 810 PetscCall(MatAssemblyEnd(baij->B, mode)); 811 812 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 813 814 baij->rowvalues = NULL; 815 816 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 817 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 818 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 819 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 825 #include <petscdraw.h> 826 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 827 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 828 PetscInt bs = mat->rmap->bs; 829 PetscMPIInt rank = baij->rank; 830 PetscBool iascii, isdraw; 831 PetscViewer sviewer; 832 PetscViewerFormat format; 833 834 PetscFunctionBegin; 835 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 836 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 837 if (iascii) { 838 PetscCall(PetscViewerGetFormat(viewer, &format)); 839 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 840 MatInfo info; 841 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 842 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 843 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 844 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 845 mat->rmap->bs, (double)info.memory)); 846 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 847 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 848 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 849 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 850 PetscCall(PetscViewerFlush(viewer)); 851 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 852 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 853 PetscCall(VecScatterView(baij->Mvctx, viewer)); 854 PetscFunctionReturn(0); 855 } else if (format == PETSC_VIEWER_ASCII_INFO) { 856 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 857 PetscFunctionReturn(0); 858 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 859 PetscFunctionReturn(0); 860 } 861 } 862 863 if (isdraw) { 864 PetscDraw draw; 865 PetscBool isnull; 866 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 867 PetscCall(PetscDrawIsNull(draw, &isnull)); 868 if (isnull) PetscFunctionReturn(0); 869 } 870 871 { 872 /* assemble the entire matrix onto first processor. */ 873 Mat A; 874 Mat_SeqSBAIJ *Aloc; 875 Mat_SeqBAIJ *Bloc; 876 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 877 MatScalar *a; 878 const char *matname; 879 880 /* Should this be the same type as mat? */ 881 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 882 if (rank == 0) { 883 PetscCall(MatSetSizes(A, M, N, M, N)); 884 } else { 885 PetscCall(MatSetSizes(A, 0, 0, M, N)); 886 } 887 PetscCall(MatSetType(A, MATMPISBAIJ)); 888 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 889 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 890 891 /* copy over the A part */ 892 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 893 ai = Aloc->i; 894 aj = Aloc->j; 895 a = Aloc->a; 896 PetscCall(PetscMalloc1(bs, &rvals)); 897 898 for (i = 0; i < mbs; i++) { 899 rvals[0] = bs * (baij->rstartbs + i); 900 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 901 for (j = ai[i]; j < ai[i + 1]; j++) { 902 col = (baij->cstartbs + aj[j]) * bs; 903 for (k = 0; k < bs; k++) { 904 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 905 col++; 906 a += bs; 907 } 908 } 909 } 910 /* copy over the B part */ 911 Bloc = (Mat_SeqBAIJ *)baij->B->data; 912 ai = Bloc->i; 913 aj = Bloc->j; 914 a = Bloc->a; 915 for (i = 0; i < mbs; i++) { 916 rvals[0] = bs * (baij->rstartbs + i); 917 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 918 for (j = ai[i]; j < ai[i + 1]; j++) { 919 col = baij->garray[aj[j]] * bs; 920 for (k = 0; k < bs; k++) { 921 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 922 col++; 923 a += bs; 924 } 925 } 926 } 927 PetscCall(PetscFree(rvals)); 928 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 929 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 930 /* 931 Everyone has to call to draw the matrix since the graphics waits are 932 synchronized across all processors that share the PetscDraw object 933 */ 934 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 935 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 936 if (rank == 0) { 937 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 938 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 939 } 940 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 941 PetscCall(PetscViewerFlush(viewer)); 942 PetscCall(MatDestroy(&A)); 943 } 944 PetscFunctionReturn(0); 945 } 946 947 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 948 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 949 950 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) { 951 PetscBool iascii, isdraw, issocket, isbinary; 952 953 PetscFunctionBegin; 954 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 955 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 956 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 957 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 958 if (iascii || isdraw || issocket) { 959 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 960 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 961 PetscFunctionReturn(0); 962 } 963 964 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) { 965 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 966 967 PetscFunctionBegin; 968 #if defined(PETSC_USE_LOG) 969 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 970 #endif 971 PetscCall(MatStashDestroy_Private(&mat->stash)); 972 PetscCall(MatStashDestroy_Private(&mat->bstash)); 973 PetscCall(MatDestroy(&baij->A)); 974 PetscCall(MatDestroy(&baij->B)); 975 #if defined(PETSC_USE_CTABLE) 976 PetscCall(PetscTableDestroy(&baij->colmap)); 977 #else 978 PetscCall(PetscFree(baij->colmap)); 979 #endif 980 PetscCall(PetscFree(baij->garray)); 981 PetscCall(VecDestroy(&baij->lvec)); 982 PetscCall(VecScatterDestroy(&baij->Mvctx)); 983 PetscCall(VecDestroy(&baij->slvec0)); 984 PetscCall(VecDestroy(&baij->slvec0b)); 985 PetscCall(VecDestroy(&baij->slvec1)); 986 PetscCall(VecDestroy(&baij->slvec1a)); 987 PetscCall(VecDestroy(&baij->slvec1b)); 988 PetscCall(VecScatterDestroy(&baij->sMvctx)); 989 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 990 PetscCall(PetscFree(baij->barray)); 991 PetscCall(PetscFree(baij->hd)); 992 PetscCall(VecDestroy(&baij->diag)); 993 PetscCall(VecDestroy(&baij->bb1)); 994 PetscCall(VecDestroy(&baij->xx1)); 995 #if defined(PETSC_USE_REAL_MAT_SINGLE) 996 PetscCall(PetscFree(baij->setvaluescopy)); 997 #endif 998 PetscCall(PetscFree(baij->in_loc)); 999 PetscCall(PetscFree(baij->v_loc)); 1000 PetscCall(PetscFree(baij->rangebs)); 1001 PetscCall(PetscFree(mat->data)); 1002 1003 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1004 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1005 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1006 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 1007 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 1008 #if defined(PETSC_HAVE_ELEMENTAL) 1009 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 1010 #endif 1011 #if defined(PETSC_HAVE_SCALAPACK) 1012 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 1013 #endif 1014 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 1015 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) { 1020 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1021 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1022 PetscScalar *from; 1023 const PetscScalar *x; 1024 1025 PetscFunctionBegin; 1026 /* diagonal part */ 1027 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1028 PetscCall(VecSet(a->slvec1b, 0.0)); 1029 1030 /* subdiagonal part */ 1031 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1032 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1033 1034 /* copy x into the vec slvec0 */ 1035 PetscCall(VecGetArray(a->slvec0, &from)); 1036 PetscCall(VecGetArrayRead(xx, &x)); 1037 1038 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1039 PetscCall(VecRestoreArray(a->slvec0, &from)); 1040 PetscCall(VecRestoreArrayRead(xx, &x)); 1041 1042 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1043 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1044 /* supperdiagonal part */ 1045 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) { 1050 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1051 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1052 PetscScalar *from; 1053 const PetscScalar *x; 1054 1055 PetscFunctionBegin; 1056 /* diagonal part */ 1057 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1058 PetscCall(VecSet(a->slvec1b, 0.0)); 1059 1060 /* subdiagonal part */ 1061 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1062 1063 /* copy x into the vec slvec0 */ 1064 PetscCall(VecGetArray(a->slvec0, &from)); 1065 PetscCall(VecGetArrayRead(xx, &x)); 1066 1067 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1068 PetscCall(VecRestoreArray(a->slvec0, &from)); 1069 PetscCall(VecRestoreArrayRead(xx, &x)); 1070 1071 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1072 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1073 /* supperdiagonal part */ 1074 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1075 PetscFunctionReturn(0); 1076 } 1077 1078 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) { 1079 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1080 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1081 PetscScalar *from, zero = 0.0; 1082 const PetscScalar *x; 1083 1084 PetscFunctionBegin; 1085 /* diagonal part */ 1086 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1087 PetscCall(VecSet(a->slvec1b, zero)); 1088 1089 /* subdiagonal part */ 1090 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1091 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1092 1093 /* copy x into the vec slvec0 */ 1094 PetscCall(VecGetArray(a->slvec0, &from)); 1095 PetscCall(VecGetArrayRead(xx, &x)); 1096 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1097 PetscCall(VecRestoreArray(a->slvec0, &from)); 1098 1099 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1100 PetscCall(VecRestoreArrayRead(xx, &x)); 1101 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1102 1103 /* supperdiagonal part */ 1104 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1109 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1110 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1111 PetscScalar *from, zero = 0.0; 1112 const PetscScalar *x; 1113 1114 PetscFunctionBegin; 1115 /* diagonal part */ 1116 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1117 PetscCall(VecSet(a->slvec1b, zero)); 1118 1119 /* subdiagonal part */ 1120 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1121 1122 /* copy x into the vec slvec0 */ 1123 PetscCall(VecGetArray(a->slvec0, &from)); 1124 PetscCall(VecGetArrayRead(xx, &x)); 1125 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1126 PetscCall(VecRestoreArray(a->slvec0, &from)); 1127 1128 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1129 PetscCall(VecRestoreArrayRead(xx, &x)); 1130 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1131 1132 /* supperdiagonal part */ 1133 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 /* 1138 This only works correctly for square matrices where the subblock A->A is the 1139 diagonal block 1140 */ 1141 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) { 1142 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1143 1144 PetscFunctionBegin; 1145 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1146 PetscCall(MatGetDiagonal(a->A, v)); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) { 1151 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1152 1153 PetscFunctionBegin; 1154 PetscCall(MatScale(a->A, aa)); 1155 PetscCall(MatScale(a->B, aa)); 1156 PetscFunctionReturn(0); 1157 } 1158 1159 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1160 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1161 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1162 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1163 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1164 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1165 1166 PetscFunctionBegin; 1167 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1168 mat->getrowactive = PETSC_TRUE; 1169 1170 if (!mat->rowvalues && (idx || v)) { 1171 /* 1172 allocate enough space to hold information from the longest row. 1173 */ 1174 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1175 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1176 PetscInt max = 1, mbs = mat->mbs, tmp; 1177 for (i = 0; i < mbs; i++) { 1178 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1179 if (max < tmp) max = tmp; 1180 } 1181 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1182 } 1183 1184 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1185 lrow = row - brstart; /* local row index */ 1186 1187 pvA = &vworkA; 1188 pcA = &cworkA; 1189 pvB = &vworkB; 1190 pcB = &cworkB; 1191 if (!v) { 1192 pvA = NULL; 1193 pvB = NULL; 1194 } 1195 if (!idx) { 1196 pcA = NULL; 1197 if (!v) pcB = NULL; 1198 } 1199 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1200 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1201 nztot = nzA + nzB; 1202 1203 cmap = mat->garray; 1204 if (v || idx) { 1205 if (nztot) { 1206 /* Sort by increasing column numbers, assuming A and B already sorted */ 1207 PetscInt imark = -1; 1208 if (v) { 1209 *v = v_p = mat->rowvalues; 1210 for (i = 0; i < nzB; i++) { 1211 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1212 else break; 1213 } 1214 imark = i; 1215 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1216 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1217 } 1218 if (idx) { 1219 *idx = idx_p = mat->rowindices; 1220 if (imark > -1) { 1221 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1222 } else { 1223 for (i = 0; i < nzB; i++) { 1224 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1225 else break; 1226 } 1227 imark = i; 1228 } 1229 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1230 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1231 } 1232 } else { 1233 if (idx) *idx = NULL; 1234 if (v) *v = NULL; 1235 } 1236 } 1237 *nz = nztot; 1238 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1239 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1240 PetscFunctionReturn(0); 1241 } 1242 1243 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1244 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1245 1246 PetscFunctionBegin; 1247 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1248 baij->getrowactive = PETSC_FALSE; 1249 PetscFunctionReturn(0); 1250 } 1251 1252 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) { 1253 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1254 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1255 1256 PetscFunctionBegin; 1257 aA->getrow_utriangular = PETSC_TRUE; 1258 PetscFunctionReturn(0); 1259 } 1260 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) { 1261 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1262 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1263 1264 PetscFunctionBegin; 1265 aA->getrow_utriangular = PETSC_FALSE; 1266 PetscFunctionReturn(0); 1267 } 1268 1269 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) { 1270 PetscFunctionBegin; 1271 if (PetscDefined(USE_COMPLEX)) { 1272 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1273 1274 PetscCall(MatConjugate(a->A)); 1275 PetscCall(MatConjugate(a->B)); 1276 } 1277 PetscFunctionReturn(0); 1278 } 1279 1280 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) { 1281 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1282 1283 PetscFunctionBegin; 1284 PetscCall(MatRealPart(a->A)); 1285 PetscCall(MatRealPart(a->B)); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) { 1290 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1291 1292 PetscFunctionBegin; 1293 PetscCall(MatImaginaryPart(a->A)); 1294 PetscCall(MatImaginaryPart(a->B)); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1299 Input: isrow - distributed(parallel), 1300 iscol_local - locally owned (seq) 1301 */ 1302 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) { 1303 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1304 const PetscInt *ptr1, *ptr2; 1305 1306 PetscFunctionBegin; 1307 PetscCall(ISGetLocalSize(isrow, &sz1)); 1308 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1309 if (sz1 > sz2) { 1310 *flg = PETSC_FALSE; 1311 PetscFunctionReturn(0); 1312 } 1313 1314 PetscCall(ISGetIndices(isrow, &ptr1)); 1315 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1316 1317 PetscCall(PetscMalloc1(sz1, &a1)); 1318 PetscCall(PetscMalloc1(sz2, &a2)); 1319 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1320 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1321 PetscCall(PetscSortInt(sz1, a1)); 1322 PetscCall(PetscSortInt(sz2, a2)); 1323 1324 nmatch = 0; 1325 k = 0; 1326 for (i = 0; i < sz1; i++) { 1327 for (j = k; j < sz2; j++) { 1328 if (a1[i] == a2[j]) { 1329 k = j; 1330 nmatch++; 1331 break; 1332 } 1333 } 1334 } 1335 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1336 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1337 PetscCall(PetscFree(a1)); 1338 PetscCall(PetscFree(a2)); 1339 if (nmatch < sz1) { 1340 *flg = PETSC_FALSE; 1341 } else { 1342 *flg = PETSC_TRUE; 1343 } 1344 PetscFunctionReturn(0); 1345 } 1346 1347 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) { 1348 IS iscol_local; 1349 PetscInt csize; 1350 PetscBool isequal; 1351 1352 PetscFunctionBegin; 1353 PetscCall(ISGetLocalSize(iscol, &csize)); 1354 if (call == MAT_REUSE_MATRIX) { 1355 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1356 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1357 } else { 1358 PetscBool issorted; 1359 1360 PetscCall(ISAllGather(iscol, &iscol_local)); 1361 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1362 PetscCall(ISSorted(iscol_local, &issorted)); 1363 PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 1364 } 1365 1366 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1367 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1368 if (call == MAT_INITIAL_MATRIX) { 1369 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1370 PetscCall(ISDestroy(&iscol_local)); 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) { 1376 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1377 1378 PetscFunctionBegin; 1379 PetscCall(MatZeroEntries(l->A)); 1380 PetscCall(MatZeroEntries(l->B)); 1381 PetscFunctionReturn(0); 1382 } 1383 1384 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) { 1385 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1386 Mat A = a->A, B = a->B; 1387 PetscLogDouble isend[5], irecv[5]; 1388 1389 PetscFunctionBegin; 1390 info->block_size = (PetscReal)matin->rmap->bs; 1391 1392 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1393 1394 isend[0] = info->nz_used; 1395 isend[1] = info->nz_allocated; 1396 isend[2] = info->nz_unneeded; 1397 isend[3] = info->memory; 1398 isend[4] = info->mallocs; 1399 1400 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1401 1402 isend[0] += info->nz_used; 1403 isend[1] += info->nz_allocated; 1404 isend[2] += info->nz_unneeded; 1405 isend[3] += info->memory; 1406 isend[4] += info->mallocs; 1407 if (flag == MAT_LOCAL) { 1408 info->nz_used = isend[0]; 1409 info->nz_allocated = isend[1]; 1410 info->nz_unneeded = isend[2]; 1411 info->memory = isend[3]; 1412 info->mallocs = isend[4]; 1413 } else if (flag == MAT_GLOBAL_MAX) { 1414 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1415 1416 info->nz_used = irecv[0]; 1417 info->nz_allocated = irecv[1]; 1418 info->nz_unneeded = irecv[2]; 1419 info->memory = irecv[3]; 1420 info->mallocs = irecv[4]; 1421 } else if (flag == MAT_GLOBAL_SUM) { 1422 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1423 1424 info->nz_used = irecv[0]; 1425 info->nz_allocated = irecv[1]; 1426 info->nz_unneeded = irecv[2]; 1427 info->memory = irecv[3]; 1428 info->mallocs = irecv[4]; 1429 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1430 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1431 info->fill_ratio_needed = 0; 1432 info->factor_mallocs = 0; 1433 PetscFunctionReturn(0); 1434 } 1435 1436 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) { 1437 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1438 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1439 1440 PetscFunctionBegin; 1441 switch (op) { 1442 case MAT_NEW_NONZERO_LOCATIONS: 1443 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1444 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1445 case MAT_KEEP_NONZERO_PATTERN: 1446 case MAT_SUBMAT_SINGLEIS: 1447 case MAT_NEW_NONZERO_LOCATION_ERR: 1448 MatCheckPreallocated(A, 1); 1449 PetscCall(MatSetOption(a->A, op, flg)); 1450 PetscCall(MatSetOption(a->B, op, flg)); 1451 break; 1452 case MAT_ROW_ORIENTED: 1453 MatCheckPreallocated(A, 1); 1454 a->roworiented = flg; 1455 1456 PetscCall(MatSetOption(a->A, op, flg)); 1457 PetscCall(MatSetOption(a->B, op, flg)); 1458 break; 1459 case MAT_FORCE_DIAGONAL_ENTRIES: 1460 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1461 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 1462 case MAT_USE_HASH_TABLE: a->ht_flag = flg; break; 1463 case MAT_HERMITIAN: MatCheckPreallocated(A, 1); PetscCall(MatSetOption(a->A, op, flg)); 1464 #if defined(PETSC_USE_COMPLEX) 1465 if (flg) { /* need different mat-vec ops */ 1466 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1467 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1468 A->ops->multtranspose = NULL; 1469 A->ops->multtransposeadd = NULL; 1470 A->symmetric = PETSC_BOOL3_FALSE; 1471 } 1472 #endif 1473 break; 1474 case MAT_SPD: 1475 case MAT_SYMMETRIC: MatCheckPreallocated(A, 1); PetscCall(MatSetOption(a->A, op, flg)); 1476 #if defined(PETSC_USE_COMPLEX) 1477 if (flg) { /* restore to use default mat-vec ops */ 1478 A->ops->mult = MatMult_MPISBAIJ; 1479 A->ops->multadd = MatMultAdd_MPISBAIJ; 1480 A->ops->multtranspose = MatMult_MPISBAIJ; 1481 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1482 } 1483 #endif 1484 break; 1485 case MAT_STRUCTURALLY_SYMMETRIC: 1486 MatCheckPreallocated(A, 1); 1487 PetscCall(MatSetOption(a->A, op, flg)); 1488 break; 1489 case MAT_SYMMETRY_ETERNAL: 1490 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1491 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1492 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1493 break; 1494 case MAT_SPD_ETERNAL: break; 1495 case MAT_IGNORE_LOWER_TRIANGULAR: aA->ignore_ltriangular = flg; break; 1496 case MAT_ERROR_LOWER_TRIANGULAR: aA->ignore_ltriangular = flg; break; 1497 case MAT_GETROW_UPPERTRIANGULAR: aA->getrow_utriangular = flg; break; 1498 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1499 } 1500 PetscFunctionReturn(0); 1501 } 1502 1503 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) { 1504 PetscFunctionBegin; 1505 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1506 if (reuse == MAT_INITIAL_MATRIX) { 1507 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1508 } else if (reuse == MAT_REUSE_MATRIX) { 1509 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) { 1515 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1516 Mat a = baij->A, b = baij->B; 1517 PetscInt nv, m, n; 1518 PetscBool flg; 1519 1520 PetscFunctionBegin; 1521 if (ll != rr) { 1522 PetscCall(VecEqual(ll, rr, &flg)); 1523 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1524 } 1525 if (!ll) PetscFunctionReturn(0); 1526 1527 PetscCall(MatGetLocalSize(mat, &m, &n)); 1528 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1529 1530 PetscCall(VecGetLocalSize(rr, &nv)); 1531 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1532 1533 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1534 1535 /* left diagonalscale the off-diagonal part */ 1536 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1537 1538 /* scale the diagonal part */ 1539 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1540 1541 /* right diagonalscale the off-diagonal part */ 1542 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1543 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) { 1548 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1549 1550 PetscFunctionBegin; 1551 PetscCall(MatSetUnfactored(a->A)); 1552 PetscFunctionReturn(0); 1553 } 1554 1555 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1556 1557 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) { 1558 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1559 Mat a, b, c, d; 1560 PetscBool flg; 1561 1562 PetscFunctionBegin; 1563 a = matA->A; 1564 b = matA->B; 1565 c = matB->A; 1566 d = matB->B; 1567 1568 PetscCall(MatEqual(a, c, &flg)); 1569 if (flg) PetscCall(MatEqual(b, d, &flg)); 1570 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1571 PetscFunctionReturn(0); 1572 } 1573 1574 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) { 1575 PetscBool isbaij; 1576 1577 PetscFunctionBegin; 1578 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1579 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1580 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1581 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1582 PetscCall(MatGetRowUpperTriangular(A)); 1583 PetscCall(MatCopy_Basic(A, B, str)); 1584 PetscCall(MatRestoreRowUpperTriangular(A)); 1585 } else { 1586 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1587 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1588 1589 PetscCall(MatCopy(a->A, b->A, str)); 1590 PetscCall(MatCopy(a->B, b->B, str)); 1591 } 1592 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1593 PetscFunctionReturn(0); 1594 } 1595 1596 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) { 1597 PetscFunctionBegin; 1598 PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 1603 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1604 PetscBLASInt bnz, one = 1; 1605 Mat_SeqSBAIJ *xa, *ya; 1606 Mat_SeqBAIJ *xb, *yb; 1607 1608 PetscFunctionBegin; 1609 if (str == SAME_NONZERO_PATTERN) { 1610 PetscScalar alpha = a; 1611 xa = (Mat_SeqSBAIJ *)xx->A->data; 1612 ya = (Mat_SeqSBAIJ *)yy->A->data; 1613 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1614 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1615 xb = (Mat_SeqBAIJ *)xx->B->data; 1616 yb = (Mat_SeqBAIJ *)yy->B->data; 1617 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1618 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1619 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1620 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1621 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1622 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1623 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1624 } else { 1625 Mat B; 1626 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1627 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1628 PetscCall(MatGetRowUpperTriangular(X)); 1629 PetscCall(MatGetRowUpperTriangular(Y)); 1630 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1631 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1632 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1633 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1634 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1635 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1636 PetscCall(MatSetType(B, MATMPISBAIJ)); 1637 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1638 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1639 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1640 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1641 PetscCall(MatHeaderMerge(Y, &B)); 1642 PetscCall(PetscFree(nnz_d)); 1643 PetscCall(PetscFree(nnz_o)); 1644 PetscCall(MatRestoreRowUpperTriangular(X)); 1645 PetscCall(MatRestoreRowUpperTriangular(Y)); 1646 } 1647 PetscFunctionReturn(0); 1648 } 1649 1650 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) { 1651 PetscInt i; 1652 PetscBool flg; 1653 1654 PetscFunctionBegin; 1655 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1656 for (i = 0; i < n; i++) { 1657 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1658 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) { 1664 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1665 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1666 1667 PetscFunctionBegin; 1668 if (!Y->preallocated) { 1669 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1670 } else if (!aij->nz) { 1671 PetscInt nonew = aij->nonew; 1672 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1673 aij->nonew = nonew; 1674 } 1675 PetscCall(MatShift_Basic(Y, a)); 1676 PetscFunctionReturn(0); 1677 } 1678 1679 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) { 1680 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1681 1682 PetscFunctionBegin; 1683 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1684 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1685 if (d) { 1686 PetscInt rstart; 1687 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1688 *d += rstart / A->rmap->bs; 1689 } 1690 PetscFunctionReturn(0); 1691 } 1692 1693 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) { 1694 PetscFunctionBegin; 1695 *a = ((Mat_MPISBAIJ *)A->data)->A; 1696 PetscFunctionReturn(0); 1697 } 1698 1699 /* -------------------------------------------------------------------*/ 1700 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1701 MatGetRow_MPISBAIJ, 1702 MatRestoreRow_MPISBAIJ, 1703 MatMult_MPISBAIJ, 1704 /* 4*/ MatMultAdd_MPISBAIJ, 1705 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1706 MatMultAdd_MPISBAIJ, 1707 NULL, 1708 NULL, 1709 NULL, 1710 /* 10*/ NULL, 1711 NULL, 1712 NULL, 1713 MatSOR_MPISBAIJ, 1714 MatTranspose_MPISBAIJ, 1715 /* 15*/ MatGetInfo_MPISBAIJ, 1716 MatEqual_MPISBAIJ, 1717 MatGetDiagonal_MPISBAIJ, 1718 MatDiagonalScale_MPISBAIJ, 1719 MatNorm_MPISBAIJ, 1720 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1721 MatAssemblyEnd_MPISBAIJ, 1722 MatSetOption_MPISBAIJ, 1723 MatZeroEntries_MPISBAIJ, 1724 /* 24*/ NULL, 1725 NULL, 1726 NULL, 1727 NULL, 1728 NULL, 1729 /* 29*/ MatSetUp_MPISBAIJ, 1730 NULL, 1731 NULL, 1732 MatGetDiagonalBlock_MPISBAIJ, 1733 NULL, 1734 /* 34*/ MatDuplicate_MPISBAIJ, 1735 NULL, 1736 NULL, 1737 NULL, 1738 NULL, 1739 /* 39*/ MatAXPY_MPISBAIJ, 1740 MatCreateSubMatrices_MPISBAIJ, 1741 MatIncreaseOverlap_MPISBAIJ, 1742 MatGetValues_MPISBAIJ, 1743 MatCopy_MPISBAIJ, 1744 /* 44*/ NULL, 1745 MatScale_MPISBAIJ, 1746 MatShift_MPISBAIJ, 1747 NULL, 1748 NULL, 1749 /* 49*/ NULL, 1750 NULL, 1751 NULL, 1752 NULL, 1753 NULL, 1754 /* 54*/ NULL, 1755 NULL, 1756 MatSetUnfactored_MPISBAIJ, 1757 NULL, 1758 MatSetValuesBlocked_MPISBAIJ, 1759 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1760 NULL, 1761 NULL, 1762 NULL, 1763 NULL, 1764 /* 64*/ NULL, 1765 NULL, 1766 NULL, 1767 NULL, 1768 NULL, 1769 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1770 NULL, 1771 MatConvert_MPISBAIJ_Basic, 1772 NULL, 1773 NULL, 1774 /* 74*/ NULL, 1775 NULL, 1776 NULL, 1777 NULL, 1778 NULL, 1779 /* 79*/ NULL, 1780 NULL, 1781 NULL, 1782 NULL, 1783 MatLoad_MPISBAIJ, 1784 /* 84*/ NULL, 1785 NULL, 1786 NULL, 1787 NULL, 1788 NULL, 1789 /* 89*/ NULL, 1790 NULL, 1791 NULL, 1792 NULL, 1793 NULL, 1794 /* 94*/ NULL, 1795 NULL, 1796 NULL, 1797 NULL, 1798 NULL, 1799 /* 99*/ NULL, 1800 NULL, 1801 NULL, 1802 MatConjugate_MPISBAIJ, 1803 NULL, 1804 /*104*/ NULL, 1805 MatRealPart_MPISBAIJ, 1806 MatImaginaryPart_MPISBAIJ, 1807 MatGetRowUpperTriangular_MPISBAIJ, 1808 MatRestoreRowUpperTriangular_MPISBAIJ, 1809 /*109*/ NULL, 1810 NULL, 1811 NULL, 1812 NULL, 1813 MatMissingDiagonal_MPISBAIJ, 1814 /*114*/ NULL, 1815 NULL, 1816 NULL, 1817 NULL, 1818 NULL, 1819 /*119*/ NULL, 1820 NULL, 1821 NULL, 1822 NULL, 1823 NULL, 1824 /*124*/ NULL, 1825 NULL, 1826 NULL, 1827 NULL, 1828 NULL, 1829 /*129*/ NULL, 1830 NULL, 1831 NULL, 1832 NULL, 1833 NULL, 1834 /*134*/ NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 NULL, 1839 /*139*/ MatSetBlockSizes_Default, 1840 NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1845 NULL, 1846 NULL, 1847 NULL, 1848 NULL, 1849 NULL, 1850 /*150*/ NULL}; 1851 1852 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) { 1853 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1854 PetscInt i, mbs, Mbs; 1855 PetscMPIInt size; 1856 1857 PetscFunctionBegin; 1858 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1859 PetscCall(PetscLayoutSetUp(B->rmap)); 1860 PetscCall(PetscLayoutSetUp(B->cmap)); 1861 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1862 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1863 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1864 1865 mbs = B->rmap->n / bs; 1866 Mbs = B->rmap->N / bs; 1867 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1868 1869 B->rmap->bs = bs; 1870 b->bs2 = bs * bs; 1871 b->mbs = mbs; 1872 b->Mbs = Mbs; 1873 b->nbs = B->cmap->n / bs; 1874 b->Nbs = B->cmap->N / bs; 1875 1876 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1877 b->rstartbs = B->rmap->rstart / bs; 1878 b->rendbs = B->rmap->rend / bs; 1879 1880 b->cstartbs = B->cmap->rstart / bs; 1881 b->cendbs = B->cmap->rend / bs; 1882 1883 #if defined(PETSC_USE_CTABLE) 1884 PetscCall(PetscTableDestroy(&b->colmap)); 1885 #else 1886 PetscCall(PetscFree(b->colmap)); 1887 #endif 1888 PetscCall(PetscFree(b->garray)); 1889 PetscCall(VecDestroy(&b->lvec)); 1890 PetscCall(VecScatterDestroy(&b->Mvctx)); 1891 PetscCall(VecDestroy(&b->slvec0)); 1892 PetscCall(VecDestroy(&b->slvec0b)); 1893 PetscCall(VecDestroy(&b->slvec1)); 1894 PetscCall(VecDestroy(&b->slvec1a)); 1895 PetscCall(VecDestroy(&b->slvec1b)); 1896 PetscCall(VecScatterDestroy(&b->sMvctx)); 1897 1898 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1899 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1900 PetscCall(MatDestroy(&b->B)); 1901 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1902 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1903 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1904 1905 if (!B->preallocated) { 1906 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1907 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1908 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1909 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1910 } 1911 1912 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1913 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1914 1915 B->preallocated = PETSC_TRUE; 1916 B->was_assembled = PETSC_FALSE; 1917 B->assembled = PETSC_FALSE; 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) { 1922 PetscInt m, rstart, cend; 1923 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1924 const PetscInt *JJ = NULL; 1925 PetscScalar *values = NULL; 1926 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 1927 PetscBool nooffprocentries; 1928 1929 PetscFunctionBegin; 1930 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 1931 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 1932 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 1933 PetscCall(PetscLayoutSetUp(B->rmap)); 1934 PetscCall(PetscLayoutSetUp(B->cmap)); 1935 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1936 m = B->rmap->n / bs; 1937 rstart = B->rmap->rstart / bs; 1938 cend = B->cmap->rend / bs; 1939 1940 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 1941 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 1942 for (i = 0; i < m; i++) { 1943 nz = ii[i + 1] - ii[i]; 1944 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 1945 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 1946 JJ = jj + ii[i]; 1947 bd = 0; 1948 for (j = 0; j < nz; j++) { 1949 if (*JJ >= i + rstart) break; 1950 JJ++; 1951 bd++; 1952 } 1953 d = 0; 1954 for (; j < nz; j++) { 1955 if (*JJ++ >= cend) break; 1956 d++; 1957 } 1958 d_nnz[i] = d; 1959 o_nnz[i] = nz - d - bd; 1960 nz = nz - bd; 1961 nz_max = PetscMax(nz_max, nz); 1962 } 1963 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 1964 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 1965 PetscCall(PetscFree2(d_nnz, o_nnz)); 1966 1967 values = (PetscScalar *)V; 1968 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 1969 for (i = 0; i < m; i++) { 1970 PetscInt row = i + rstart; 1971 PetscInt ncols = ii[i + 1] - ii[i]; 1972 const PetscInt *icols = jj + ii[i]; 1973 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 1974 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 1975 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 1976 } else { /* block ordering does not match so we can only insert one block at a time. */ 1977 PetscInt j; 1978 for (j = 0; j < ncols; j++) { 1979 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 1980 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 1981 } 1982 } 1983 } 1984 1985 if (!V) PetscCall(PetscFree(values)); 1986 nooffprocentries = B->nooffprocentries; 1987 B->nooffprocentries = PETSC_TRUE; 1988 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1989 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1990 B->nooffprocentries = nooffprocentries; 1991 1992 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1993 PetscFunctionReturn(0); 1994 } 1995 1996 /*MC 1997 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 1998 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 1999 the matrix is stored. 2000 2001 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2002 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2003 2004 Options Database Keys: 2005 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2006 2007 Note: 2008 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2009 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2010 2011 Level: beginner 2012 2013 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2014 M*/ 2015 2016 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) { 2017 Mat_MPISBAIJ *b; 2018 PetscBool flg = PETSC_FALSE; 2019 2020 PetscFunctionBegin; 2021 PetscCall(PetscNew(&b)); 2022 B->data = (void *)b; 2023 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2024 2025 B->ops->destroy = MatDestroy_MPISBAIJ; 2026 B->ops->view = MatView_MPISBAIJ; 2027 B->assembled = PETSC_FALSE; 2028 B->insertmode = NOT_SET_VALUES; 2029 2030 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2031 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2032 2033 /* build local table of row and column ownerships */ 2034 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2035 2036 /* build cache for off array entries formed */ 2037 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2038 2039 b->donotstash = PETSC_FALSE; 2040 b->colmap = NULL; 2041 b->garray = NULL; 2042 b->roworiented = PETSC_TRUE; 2043 2044 /* stuff used in block assembly */ 2045 b->barray = NULL; 2046 2047 /* stuff used for matrix vector multiply */ 2048 b->lvec = NULL; 2049 b->Mvctx = NULL; 2050 b->slvec0 = NULL; 2051 b->slvec0b = NULL; 2052 b->slvec1 = NULL; 2053 b->slvec1a = NULL; 2054 b->slvec1b = NULL; 2055 b->sMvctx = NULL; 2056 2057 /* stuff for MatGetRow() */ 2058 b->rowindices = NULL; 2059 b->rowvalues = NULL; 2060 b->getrowactive = PETSC_FALSE; 2061 2062 /* hash table stuff */ 2063 b->ht = NULL; 2064 b->hd = NULL; 2065 b->ht_size = 0; 2066 b->ht_flag = PETSC_FALSE; 2067 b->ht_fact = 0; 2068 b->ht_total_ct = 0; 2069 b->ht_insert_ct = 0; 2070 2071 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2072 b->ijonly = PETSC_FALSE; 2073 2074 b->in_loc = NULL; 2075 b->v_loc = NULL; 2076 b->n_loc = 0; 2077 2078 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2079 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2080 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2081 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2082 #if defined(PETSC_HAVE_ELEMENTAL) 2083 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2084 #endif 2085 #if defined(PETSC_HAVE_SCALAPACK) 2086 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2087 #endif 2088 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2089 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2090 2091 B->symmetric = PETSC_BOOL3_TRUE; 2092 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2093 B->symmetry_eternal = PETSC_TRUE; 2094 B->structural_symmetry_eternal = PETSC_TRUE; 2095 #if defined(PETSC_USE_COMPLEX) 2096 B->hermitian = PETSC_BOOL3_FALSE; 2097 #else 2098 B->hermitian = PETSC_BOOL3_TRUE; 2099 #endif 2100 2101 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2102 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2103 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2104 if (flg) { 2105 PetscReal fact = 1.39; 2106 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2107 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2108 if (fact <= 1.0) fact = 1.39; 2109 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2110 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2111 } 2112 PetscOptionsEnd(); 2113 PetscFunctionReturn(0); 2114 } 2115 2116 /*MC 2117 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2118 2119 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2120 and `MATMPISBAIJ` otherwise. 2121 2122 Options Database Key: 2123 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2124 2125 Level: beginner 2126 2127 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2128 M*/ 2129 2130 /*@C 2131 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2132 the user should preallocate the matrix storage by setting the parameters 2133 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2134 performance can be increased by more than a factor of 50. 2135 2136 Collective on B 2137 2138 Input Parameters: 2139 + B - the matrix 2140 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2141 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2142 . d_nz - number of block nonzeros per block row in diagonal portion of local 2143 submatrix (same for all local rows) 2144 . d_nnz - array containing the number of block nonzeros in the various block rows 2145 in the upper triangular and diagonal part of the in diagonal portion of the local 2146 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2147 for the diagonal entry and set a value even if it is zero. 2148 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2149 submatrix (same for all local rows). 2150 - o_nnz - array containing the number of nonzeros in the various block rows of the 2151 off-diagonal portion of the local submatrix that is right of the diagonal 2152 (possibly different for each block row) or NULL. 2153 2154 Options Database Keys: 2155 + -mat_no_unroll - uses code that does not unroll the loops in the 2156 block calculations (much slower) 2157 - -mat_block_size - size of the blocks to use 2158 2159 Notes: 2160 2161 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2162 than it must be used on all processors that share the object for that argument. 2163 2164 If the *_nnz parameter is given then the *_nz parameter is ignored 2165 2166 Storage Information: 2167 For a square global matrix we define each processor's diagonal portion 2168 to be its local rows and the corresponding columns (a square submatrix); 2169 each processor's off-diagonal portion encompasses the remainder of the 2170 local matrix (a rectangular submatrix). 2171 2172 The user can specify preallocated storage for the diagonal part of 2173 the local submatrix with either d_nz or d_nnz (not both). Set 2174 d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic 2175 memory allocation. Likewise, specify preallocated storage for the 2176 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2177 2178 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2179 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2180 You can also run with the option -info and look for messages with the string 2181 malloc in them to see if additional memory allocation was needed. 2182 2183 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2184 the figure below we depict these three local rows and all columns (0-11). 2185 2186 .vb 2187 0 1 2 3 4 5 6 7 8 9 10 11 2188 -------------------------- 2189 row 3 |. . . d d d o o o o o o 2190 row 4 |. . . d d d o o o o o o 2191 row 5 |. . . d d d o o o o o o 2192 -------------------------- 2193 .ve 2194 2195 Thus, any entries in the d locations are stored in the d (diagonal) 2196 submatrix, and any entries in the o locations are stored in the 2197 o (off-diagonal) submatrix. Note that the d matrix is stored in 2198 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2199 2200 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2201 plus the diagonal part of the d matrix, 2202 and o_nz should indicate the number of block nonzeros per row in the o matrix 2203 2204 In general, for PDE problems in which most nonzeros are near the diagonal, 2205 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2206 or you will get TERRIBLE performance; see the users' manual chapter on 2207 matrices. 2208 2209 Level: intermediate 2210 2211 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2212 @*/ 2213 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 2214 PetscFunctionBegin; 2215 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2216 PetscValidType(B, 1); 2217 PetscValidLogicalCollectiveInt(B, bs, 2); 2218 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2219 PetscFunctionReturn(0); 2220 } 2221 2222 /*@C 2223 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2224 (block compressed row). For good matrix assembly performance 2225 the user should preallocate the matrix storage by setting the parameters 2226 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2227 performance can be increased by more than a factor of 50. 2228 2229 Collective 2230 2231 Input Parameters: 2232 + comm - MPI communicator 2233 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2234 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2235 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2236 This value should be the same as the local size used in creating the 2237 y vector for the matrix-vector product y = Ax. 2238 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2239 This value should be the same as the local size used in creating the 2240 x vector for the matrix-vector product y = Ax. 2241 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2242 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2243 . d_nz - number of block nonzeros per block row in diagonal portion of local 2244 submatrix (same for all local rows) 2245 . d_nnz - array containing the number of block nonzeros in the various block rows 2246 in the upper triangular portion of the in diagonal portion of the local 2247 (possibly different for each block block row) or NULL. 2248 If you plan to factor the matrix you must leave room for the diagonal entry and 2249 set its value even if it is zero. 2250 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2251 submatrix (same for all local rows). 2252 - o_nnz - array containing the number of nonzeros in the various block rows of the 2253 off-diagonal portion of the local submatrix (possibly different for 2254 each block row) or NULL. 2255 2256 Output Parameter: 2257 . A - the matrix 2258 2259 Options Database Keys: 2260 + -mat_no_unroll - uses code that does not unroll the loops in the 2261 block calculations (much slower) 2262 . -mat_block_size - size of the blocks to use 2263 - -mat_mpi - use the parallel matrix data structures even on one processor 2264 (defaults to using SeqBAIJ format on one processor) 2265 2266 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2267 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2268 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2269 2270 Notes: 2271 The number of rows and columns must be divisible by blocksize. 2272 This matrix type does not support complex Hermitian operation. 2273 2274 The user MUST specify either the local or global matrix dimensions 2275 (possibly both). 2276 2277 If` PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2278 than it must be used on all processors that share the object for that argument. 2279 2280 If the *_nnz parameter is given then the *_nz parameter is ignored 2281 2282 Storage Information: 2283 For a square global matrix we define each processor's diagonal portion 2284 to be its local rows and the corresponding columns (a square submatrix); 2285 each processor's off-diagonal portion encompasses the remainder of the 2286 local matrix (a rectangular submatrix). 2287 2288 The user can specify preallocated storage for the diagonal part of 2289 the local submatrix with either d_nz or d_nnz (not both). Set 2290 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2291 memory allocation. Likewise, specify preallocated storage for the 2292 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2293 2294 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2295 the figure below we depict these three local rows and all columns (0-11). 2296 2297 .vb 2298 0 1 2 3 4 5 6 7 8 9 10 11 2299 -------------------------- 2300 row 3 |. . . d d d o o o o o o 2301 row 4 |. . . d d d o o o o o o 2302 row 5 |. . . d d d o o o o o o 2303 -------------------------- 2304 .ve 2305 2306 Thus, any entries in the d locations are stored in the d (diagonal) 2307 submatrix, and any entries in the o locations are stored in the 2308 o (off-diagonal) submatrix. Note that the d matrix is stored in 2309 MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format. 2310 2311 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2312 plus the diagonal part of the d matrix, 2313 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2314 In general, for PDE problems in which most nonzeros are near the diagonal, 2315 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2316 or you will get TERRIBLE performance; see the users' manual chapter on 2317 matrices. 2318 2319 Level: intermediate 2320 2321 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2322 @*/ 2323 2324 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) { 2325 PetscMPIInt size; 2326 2327 PetscFunctionBegin; 2328 PetscCall(MatCreate(comm, A)); 2329 PetscCall(MatSetSizes(*A, m, n, M, N)); 2330 PetscCallMPI(MPI_Comm_size(comm, &size)); 2331 if (size > 1) { 2332 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2333 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2334 } else { 2335 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2336 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2337 } 2338 PetscFunctionReturn(0); 2339 } 2340 2341 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) { 2342 Mat mat; 2343 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2344 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2345 PetscScalar *array; 2346 2347 PetscFunctionBegin; 2348 *newmat = NULL; 2349 2350 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2351 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2352 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2353 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2354 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2355 2356 mat->factortype = matin->factortype; 2357 mat->preallocated = PETSC_TRUE; 2358 mat->assembled = PETSC_TRUE; 2359 mat->insertmode = NOT_SET_VALUES; 2360 2361 a = (Mat_MPISBAIJ *)mat->data; 2362 a->bs2 = oldmat->bs2; 2363 a->mbs = oldmat->mbs; 2364 a->nbs = oldmat->nbs; 2365 a->Mbs = oldmat->Mbs; 2366 a->Nbs = oldmat->Nbs; 2367 2368 a->size = oldmat->size; 2369 a->rank = oldmat->rank; 2370 a->donotstash = oldmat->donotstash; 2371 a->roworiented = oldmat->roworiented; 2372 a->rowindices = NULL; 2373 a->rowvalues = NULL; 2374 a->getrowactive = PETSC_FALSE; 2375 a->barray = NULL; 2376 a->rstartbs = oldmat->rstartbs; 2377 a->rendbs = oldmat->rendbs; 2378 a->cstartbs = oldmat->cstartbs; 2379 a->cendbs = oldmat->cendbs; 2380 2381 /* hash table stuff */ 2382 a->ht = NULL; 2383 a->hd = NULL; 2384 a->ht_size = 0; 2385 a->ht_flag = oldmat->ht_flag; 2386 a->ht_fact = oldmat->ht_fact; 2387 a->ht_total_ct = 0; 2388 a->ht_insert_ct = 0; 2389 2390 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2391 if (oldmat->colmap) { 2392 #if defined(PETSC_USE_CTABLE) 2393 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 2394 #else 2395 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2396 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2397 #endif 2398 } else a->colmap = NULL; 2399 2400 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2401 PetscCall(PetscMalloc1(len, &a->garray)); 2402 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2403 } else a->garray = NULL; 2404 2405 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2406 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2407 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2408 2409 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2410 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2411 2412 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2413 PetscCall(VecGetArray(a->slvec1, &array)); 2414 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2415 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2416 PetscCall(VecRestoreArray(a->slvec1, &array)); 2417 PetscCall(VecGetArray(a->slvec0, &array)); 2418 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2419 PetscCall(VecRestoreArray(a->slvec0, &array)); 2420 2421 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2422 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2423 a->sMvctx = oldmat->sMvctx; 2424 2425 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2426 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2427 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2428 *newmat = mat; 2429 PetscFunctionReturn(0); 2430 } 2431 2432 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2433 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2434 2435 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) { 2436 PetscBool isbinary; 2437 2438 PetscFunctionBegin; 2439 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2440 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2441 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) { 2446 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2447 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2448 PetscReal atmp; 2449 PetscReal *work, *svalues, *rvalues; 2450 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2451 PetscMPIInt rank, size; 2452 PetscInt *rowners_bs, dest, count, source; 2453 PetscScalar *va; 2454 MatScalar *ba; 2455 MPI_Status stat; 2456 2457 PetscFunctionBegin; 2458 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2459 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2460 PetscCall(VecGetArray(v, &va)); 2461 2462 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2463 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2464 2465 bs = A->rmap->bs; 2466 mbs = a->mbs; 2467 Mbs = a->Mbs; 2468 ba = b->a; 2469 bi = b->i; 2470 bj = b->j; 2471 2472 /* find ownerships */ 2473 rowners_bs = A->rmap->range; 2474 2475 /* each proc creates an array to be distributed */ 2476 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2477 2478 /* row_max for B */ 2479 if (rank != size - 1) { 2480 for (i = 0; i < mbs; i++) { 2481 ncols = bi[1] - bi[0]; 2482 bi++; 2483 brow = bs * i; 2484 for (j = 0; j < ncols; j++) { 2485 bcol = bs * (*bj); 2486 for (kcol = 0; kcol < bs; kcol++) { 2487 col = bcol + kcol; /* local col index */ 2488 col += rowners_bs[rank + 1]; /* global col index */ 2489 for (krow = 0; krow < bs; krow++) { 2490 atmp = PetscAbsScalar(*ba); 2491 ba++; 2492 row = brow + krow; /* local row index */ 2493 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2494 if (work[col] < atmp) work[col] = atmp; 2495 } 2496 } 2497 bj++; 2498 } 2499 } 2500 2501 /* send values to its owners */ 2502 for (dest = rank + 1; dest < size; dest++) { 2503 svalues = work + rowners_bs[dest]; 2504 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2505 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2506 } 2507 } 2508 2509 /* receive values */ 2510 if (rank) { 2511 rvalues = work; 2512 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2513 for (source = 0; source < rank; source++) { 2514 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2515 /* process values */ 2516 for (i = 0; i < count; i++) { 2517 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2518 } 2519 } 2520 } 2521 2522 PetscCall(VecRestoreArray(v, &va)); 2523 PetscCall(PetscFree(work)); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 2528 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2529 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2530 PetscScalar *x, *ptr, *from; 2531 Vec bb1; 2532 const PetscScalar *b; 2533 2534 PetscFunctionBegin; 2535 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2536 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2537 2538 if (flag == SOR_APPLY_UPPER) { 2539 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2544 if (flag & SOR_ZERO_INITIAL_GUESS) { 2545 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2546 its--; 2547 } 2548 2549 PetscCall(VecDuplicate(bb, &bb1)); 2550 while (its--) { 2551 /* lower triangular part: slvec0b = - B^T*xx */ 2552 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2553 2554 /* copy xx into slvec0a */ 2555 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2556 PetscCall(VecGetArray(xx, &x)); 2557 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2558 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2559 2560 PetscCall(VecScale(mat->slvec0, -1.0)); 2561 2562 /* copy bb into slvec1a */ 2563 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2564 PetscCall(VecGetArrayRead(bb, &b)); 2565 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2566 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2567 2568 /* set slvec1b = 0 */ 2569 PetscCall(VecSet(mat->slvec1b, 0.0)); 2570 2571 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2572 PetscCall(VecRestoreArray(xx, &x)); 2573 PetscCall(VecRestoreArrayRead(bb, &b)); 2574 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2575 2576 /* upper triangular part: bb1 = bb1 - B*x */ 2577 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2578 2579 /* local diagonal sweep */ 2580 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2581 } 2582 PetscCall(VecDestroy(&bb1)); 2583 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2584 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2585 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2586 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2587 } else if (flag & SOR_EISENSTAT) { 2588 Vec xx1; 2589 PetscBool hasop; 2590 const PetscScalar *diag; 2591 PetscScalar *sl, scale = (omega - 2.0) / omega; 2592 PetscInt i, n; 2593 2594 if (!mat->xx1) { 2595 PetscCall(VecDuplicate(bb, &mat->xx1)); 2596 PetscCall(VecDuplicate(bb, &mat->bb1)); 2597 } 2598 xx1 = mat->xx1; 2599 bb1 = mat->bb1; 2600 2601 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2602 2603 if (!mat->diag) { 2604 /* this is wrong for same matrix with new nonzero values */ 2605 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2606 PetscCall(MatGetDiagonal(matin, mat->diag)); 2607 } 2608 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2609 2610 if (hasop) { 2611 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2612 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2613 } else { 2614 /* 2615 These two lines are replaced by code that may be a bit faster for a good compiler 2616 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2617 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2618 */ 2619 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2620 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2621 PetscCall(VecGetArrayRead(bb, &b)); 2622 PetscCall(VecGetArray(xx, &x)); 2623 PetscCall(VecGetLocalSize(xx, &n)); 2624 if (omega == 1.0) { 2625 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2626 PetscCall(PetscLogFlops(2.0 * n)); 2627 } else { 2628 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2629 PetscCall(PetscLogFlops(3.0 * n)); 2630 } 2631 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2632 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2633 PetscCall(VecRestoreArrayRead(bb, &b)); 2634 PetscCall(VecRestoreArray(xx, &x)); 2635 } 2636 2637 /* multiply off-diagonal portion of matrix */ 2638 PetscCall(VecSet(mat->slvec1b, 0.0)); 2639 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2640 PetscCall(VecGetArray(mat->slvec0, &from)); 2641 PetscCall(VecGetArray(xx, &x)); 2642 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2643 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2644 PetscCall(VecRestoreArray(xx, &x)); 2645 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2646 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2647 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2648 2649 /* local sweep */ 2650 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2651 PetscCall(VecAXPY(xx, 1.0, xx1)); 2652 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2653 PetscFunctionReturn(0); 2654 } 2655 2656 /*@ 2657 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2658 CSR format the local rows. 2659 2660 Collective 2661 2662 Input Parameters: 2663 + comm - MPI communicator 2664 . bs - the block size, only a block size of 1 is supported 2665 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2666 . n - This value should be the same as the local size used in creating the 2667 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2668 calculated if N is given) For square matrices n is almost always m. 2669 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2670 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2671 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2672 . j - column indices 2673 - a - matrix values 2674 2675 Output Parameter: 2676 . mat - the matrix 2677 2678 Level: intermediate 2679 2680 Notes: 2681 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2682 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2683 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2684 2685 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2686 2687 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2688 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2689 @*/ 2690 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) { 2691 PetscFunctionBegin; 2692 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2693 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2694 PetscCall(MatCreate(comm, mat)); 2695 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2696 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2697 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2698 PetscFunctionReturn(0); 2699 } 2700 2701 /*@C 2702 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2703 2704 Collective 2705 2706 Input Parameters: 2707 + B - the matrix 2708 . bs - the block size 2709 . i - the indices into j for the start of each local row (starts with zero) 2710 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2711 - v - optional values in the matrix 2712 2713 Level: advanced 2714 2715 Notes: 2716 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2717 and usually the numerical values as well 2718 2719 Any entries below the diagonal are ignored 2720 2721 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2722 @*/ 2723 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 2724 PetscFunctionBegin; 2725 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2726 PetscFunctionReturn(0); 2727 } 2728 2729 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 2730 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2731 PetscInt *indx; 2732 PetscScalar *values; 2733 2734 PetscFunctionBegin; 2735 PetscCall(MatGetSize(inmat, &m, &N)); 2736 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2737 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2738 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2739 PetscInt *bindx, rmax = a->rmax, j; 2740 PetscMPIInt rank, size; 2741 2742 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2743 mbs = m / bs; 2744 Nbs = N / cbs; 2745 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2746 nbs = n / cbs; 2747 2748 PetscCall(PetscMalloc1(rmax, &bindx)); 2749 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2750 2751 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2752 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2753 if (rank == size - 1) { 2754 /* Check sum(nbs) = Nbs */ 2755 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2756 } 2757 2758 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2759 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2760 for (i = 0; i < mbs; i++) { 2761 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2762 nnz = nnz / bs; 2763 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2764 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2765 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2766 } 2767 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2768 PetscCall(PetscFree(bindx)); 2769 2770 PetscCall(MatCreate(comm, outmat)); 2771 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2772 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2773 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2774 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2775 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2776 MatPreallocateEnd(dnz, onz); 2777 } 2778 2779 /* numeric phase */ 2780 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2781 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2782 2783 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2784 for (i = 0; i < m; i++) { 2785 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2786 Ii = i + rstart; 2787 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2788 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2789 } 2790 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2791 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2792 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2793 PetscFunctionReturn(0); 2794 } 2795