1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) { 16 Mat preallocator; 17 PetscInt r, rstart, rend; 18 PetscInt bs, i, m, n, M, N; 19 PetscBool cong = PETSC_TRUE; 20 21 PetscFunctionBegin; 22 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 23 PetscValidLogicalCollectiveInt(B, nm, 2); 24 for (i = 0; i < nm; i++) { 25 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 26 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 27 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 28 } 29 PetscValidLogicalCollectiveBool(B, fill, 5); 30 PetscCall(MatGetBlockSize(B, &bs)); 31 PetscCall(MatGetSize(B, &M, &N)); 32 PetscCall(MatGetLocalSize(B, &m, &n)); 33 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 34 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 35 PetscCall(MatSetBlockSize(preallocator, bs)); 36 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 37 PetscCall(MatSetUp(preallocator)); 38 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 39 for (r = rstart; r < rend; ++r) { 40 PetscInt ncols; 41 const PetscInt *row; 42 const PetscScalar *vals; 43 44 for (i = 0; i < nm; i++) { 45 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 46 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 47 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 48 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 49 } 50 } 51 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 52 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 53 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 54 PetscCall(MatDestroy(&preallocator)); 55 PetscFunctionReturn(0); 56 } 57 58 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 59 Mat B; 60 PetscInt r; 61 62 PetscFunctionBegin; 63 if (reuse != MAT_REUSE_MATRIX) { 64 PetscBool symm = PETSC_TRUE, isdense; 65 PetscInt bs; 66 67 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 68 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 69 PetscCall(MatSetType(B, newtype)); 70 PetscCall(MatGetBlockSize(A, &bs)); 71 PetscCall(MatSetBlockSize(B, bs)); 72 PetscCall(PetscLayoutSetUp(B->rmap)); 73 PetscCall(PetscLayoutSetUp(B->cmap)); 74 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 75 if (!isdense) { 76 PetscCall(MatGetRowUpperTriangular(A)); 77 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 78 PetscCall(MatRestoreRowUpperTriangular(A)); 79 } else { 80 PetscCall(MatSetUp(B)); 81 } 82 } else { 83 B = *newmat; 84 PetscCall(MatZeroEntries(B)); 85 } 86 87 PetscCall(MatGetRowUpperTriangular(A)); 88 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 89 PetscInt ncols; 90 const PetscInt *row; 91 const PetscScalar *vals; 92 93 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 94 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 95 #if defined(PETSC_USE_COMPLEX) 96 if (A->hermitian == PETSC_BOOL3_TRUE) { 97 PetscInt i; 98 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 99 } else { 100 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 101 } 102 #else 103 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 104 #endif 105 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 106 } 107 PetscCall(MatRestoreRowUpperTriangular(A)); 108 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 109 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 110 111 if (reuse == MAT_INPLACE_MATRIX) { 112 PetscCall(MatHeaderReplace(A, &B)); 113 } else { 114 *newmat = B; 115 } 116 PetscFunctionReturn(0); 117 } 118 119 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) { 120 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 121 122 PetscFunctionBegin; 123 PetscCall(MatStoreValues(aij->A)); 124 PetscCall(MatStoreValues(aij->B)); 125 PetscFunctionReturn(0); 126 } 127 128 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) { 129 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(0); 135 } 136 137 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 138 { \ 139 brow = row / bs; \ 140 rp = aj + ai[brow]; \ 141 ap = aa + bs2 * ai[brow]; \ 142 rmax = aimax[brow]; \ 143 nrow = ailen[brow]; \ 144 bcol = col / bs; \ 145 ridx = row % bs; \ 146 cidx = col % bs; \ 147 low = 0; \ 148 high = nrow; \ 149 while (high - low > 3) { \ 150 t = (low + high) / 2; \ 151 if (rp[t] > bcol) high = t; \ 152 else low = t; \ 153 } \ 154 for (_i = low; _i < high; _i++) { \ 155 if (rp[_i] > bcol) break; \ 156 if (rp[_i] == bcol) { \ 157 bap = ap + bs2 * _i + bs * cidx + ridx; \ 158 if (addv == ADD_VALUES) *bap += value; \ 159 else *bap = value; \ 160 goto a_noinsert; \ 161 } \ 162 } \ 163 if (a->nonew == 1) goto a_noinsert; \ 164 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 165 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 169 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 170 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 171 rp[_i] = bcol; \ 172 ap[bs2 * _i + bs * cidx + ridx] = value; \ 173 A->nonzerostate++; \ 174 a_noinsert:; \ 175 ailen[brow] = nrow; \ 176 } 177 178 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 179 { \ 180 brow = row / bs; \ 181 rp = bj + bi[brow]; \ 182 ap = ba + bs2 * bi[brow]; \ 183 rmax = bimax[brow]; \ 184 nrow = bilen[brow]; \ 185 bcol = col / bs; \ 186 ridx = row % bs; \ 187 cidx = col % bs; \ 188 low = 0; \ 189 high = nrow; \ 190 while (high - low > 3) { \ 191 t = (low + high) / 2; \ 192 if (rp[t] > bcol) high = t; \ 193 else low = t; \ 194 } \ 195 for (_i = low; _i < high; _i++) { \ 196 if (rp[_i] > bcol) break; \ 197 if (rp[_i] == bcol) { \ 198 bap = ap + bs2 * _i + bs * cidx + ridx; \ 199 if (addv == ADD_VALUES) *bap += value; \ 200 else *bap = value; \ 201 goto b_noinsert; \ 202 } \ 203 } \ 204 if (b->nonew == 1) goto b_noinsert; \ 205 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 206 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 207 N = nrow++ - 1; \ 208 /* shift up all the later entries in this row */ \ 209 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 210 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 211 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 212 rp[_i] = bcol; \ 213 ap[bs2 * _i + bs * cidx + ridx] = value; \ 214 B->nonzerostate++; \ 215 b_noinsert:; \ 216 bilen[brow] = nrow; \ 217 } 218 219 /* Only add/insert a(i,j) with i<=j (blocks). 220 Any a(i,j) with i>j input by user is ingored or generates an error 221 */ 222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 223 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 224 MatScalar value; 225 PetscBool roworiented = baij->roworiented; 226 PetscInt i, j, row, col; 227 PetscInt rstart_orig = mat->rmap->rstart; 228 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 229 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 230 231 /* Some Variables required in the macro */ 232 Mat A = baij->A; 233 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 234 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 235 MatScalar *aa = a->a; 236 237 Mat B = baij->B; 238 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 239 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 240 MatScalar *ba = b->a; 241 242 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 243 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 244 MatScalar *ap, *bap; 245 246 /* for stash */ 247 PetscInt n_loc, *in_loc = NULL; 248 MatScalar *v_loc = NULL; 249 250 PetscFunctionBegin; 251 if (!baij->donotstash) { 252 if (n > baij->n_loc) { 253 PetscCall(PetscFree(baij->in_loc)); 254 PetscCall(PetscFree(baij->v_loc)); 255 PetscCall(PetscMalloc1(n, &baij->in_loc)); 256 PetscCall(PetscMalloc1(n, &baij->v_loc)); 257 258 baij->n_loc = n; 259 } 260 in_loc = baij->in_loc; 261 v_loc = baij->v_loc; 262 } 263 264 for (i = 0; i < m; i++) { 265 if (im[i] < 0) continue; 266 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 267 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 268 row = im[i] - rstart_orig; /* local row index */ 269 for (j = 0; j < n; j++) { 270 if (im[i] / bs > in[j] / bs) { 271 if (a->ignore_ltriangular) { 272 continue; /* ignore lower triangular blocks */ 273 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 274 } 275 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 276 col = in[j] - cstart_orig; /* local col index */ 277 brow = row / bs; 278 bcol = col / bs; 279 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 280 if (roworiented) value = v[i * n + j]; 281 else value = v[i + j * m]; 282 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 283 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 284 } else if (in[j] < 0) { 285 continue; 286 } else { 287 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 288 /* off-diag entry (B) */ 289 if (mat->was_assembled) { 290 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col)); 293 col = col - 1; 294 #else 295 col = baij->colmap[in[j] / bs] - 1; 296 #endif 297 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 298 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 299 col = in[j]; 300 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 301 B = baij->B; 302 b = (Mat_SeqBAIJ *)(B)->data; 303 bimax = b->imax; 304 bi = b->i; 305 bilen = b->ilen; 306 bj = b->j; 307 ba = b->a; 308 } else col += in[j] % bs; 309 } else col = in[j]; 310 if (roworiented) value = v[i * n + j]; 311 else value = v[i + j * m]; 312 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 313 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 314 } 315 } 316 } else { /* off processor entry */ 317 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 318 if (!baij->donotstash) { 319 mat->assembled = PETSC_FALSE; 320 n_loc = 0; 321 for (j = 0; j < n; j++) { 322 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 323 in_loc[n_loc] = in[j]; 324 if (roworiented) { 325 v_loc[n_loc] = v[i * n + j]; 326 } else { 327 v_loc[n_loc] = v[j * m + i]; 328 } 329 n_loc++; 330 } 331 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 332 } 333 } 334 } 335 PetscFunctionReturn(0); 336 } 337 338 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) { 339 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 340 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 341 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 342 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 343 PetscBool roworiented = a->roworiented; 344 const PetscScalar *value = v; 345 MatScalar *ap, *aa = a->a, *bap; 346 347 PetscFunctionBegin; 348 if (col < row) { 349 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 350 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 351 } 352 rp = aj + ai[row]; 353 ap = aa + bs2 * ai[row]; 354 rmax = imax[row]; 355 nrow = ailen[row]; 356 value = v; 357 low = 0; 358 high = nrow; 359 360 while (high - low > 7) { 361 t = (low + high) / 2; 362 if (rp[t] > col) high = t; 363 else low = t; 364 } 365 for (i = low; i < high; i++) { 366 if (rp[i] > col) break; 367 if (rp[i] == col) { 368 bap = ap + bs2 * i; 369 if (roworiented) { 370 if (is == ADD_VALUES) { 371 for (ii = 0; ii < bs; ii++) { 372 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 373 } 374 } else { 375 for (ii = 0; ii < bs; ii++) { 376 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 377 } 378 } 379 } else { 380 if (is == ADD_VALUES) { 381 for (ii = 0; ii < bs; ii++) { 382 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 383 } 384 } else { 385 for (ii = 0; ii < bs; ii++) { 386 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 387 } 388 } 389 } 390 goto noinsert2; 391 } 392 } 393 if (nonew == 1) goto noinsert2; 394 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 395 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 396 N = nrow++ - 1; 397 high++; 398 /* shift up all the later entries in this row */ 399 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 400 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 401 rp[i] = col; 402 bap = ap + bs2 * i; 403 if (roworiented) { 404 for (ii = 0; ii < bs; ii++) { 405 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 406 } 407 } else { 408 for (ii = 0; ii < bs; ii++) { 409 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 410 } 411 } 412 noinsert2:; 413 ailen[row] = nrow; 414 PetscFunctionReturn(0); 415 } 416 417 /* 418 This routine is exactly duplicated in mpibaij.c 419 */ 420 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) { 421 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 422 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 423 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 424 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 425 PetscBool roworiented = a->roworiented; 426 const PetscScalar *value = v; 427 MatScalar *ap, *aa = a->a, *bap; 428 429 PetscFunctionBegin; 430 rp = aj + ai[row]; 431 ap = aa + bs2 * ai[row]; 432 rmax = imax[row]; 433 nrow = ailen[row]; 434 low = 0; 435 high = nrow; 436 value = v; 437 while (high - low > 7) { 438 t = (low + high) / 2; 439 if (rp[t] > col) high = t; 440 else low = t; 441 } 442 for (i = low; i < high; i++) { 443 if (rp[i] > col) break; 444 if (rp[i] == col) { 445 bap = ap + bs2 * i; 446 if (roworiented) { 447 if (is == ADD_VALUES) { 448 for (ii = 0; ii < bs; ii++) { 449 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 450 } 451 } else { 452 for (ii = 0; ii < bs; ii++) { 453 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 454 } 455 } 456 } else { 457 if (is == ADD_VALUES) { 458 for (ii = 0; ii < bs; ii++, value += bs) { 459 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 460 bap += bs; 461 } 462 } else { 463 for (ii = 0; ii < bs; ii++, value += bs) { 464 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 465 bap += bs; 466 } 467 } 468 } 469 goto noinsert2; 470 } 471 } 472 if (nonew == 1) goto noinsert2; 473 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 474 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 475 N = nrow++ - 1; 476 high++; 477 /* shift up all the later entries in this row */ 478 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 479 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 480 rp[i] = col; 481 bap = ap + bs2 * i; 482 if (roworiented) { 483 for (ii = 0; ii < bs; ii++) { 484 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 485 } 486 } else { 487 for (ii = 0; ii < bs; ii++) { 488 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 489 } 490 } 491 noinsert2:; 492 ailen[row] = nrow; 493 PetscFunctionReturn(0); 494 } 495 496 /* 497 This routine could be optimized by removing the need for the block copy below and passing stride information 498 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 499 */ 500 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) { 501 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 502 const MatScalar *value; 503 MatScalar *barray = baij->barray; 504 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 505 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 506 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 507 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 508 509 PetscFunctionBegin; 510 if (!barray) { 511 PetscCall(PetscMalloc1(bs2, &barray)); 512 baij->barray = barray; 513 } 514 515 if (roworiented) { 516 stepval = (n - 1) * bs; 517 } else { 518 stepval = (m - 1) * bs; 519 } 520 for (i = 0; i < m; i++) { 521 if (im[i] < 0) continue; 522 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 523 if (im[i] >= rstart && im[i] < rend) { 524 row = im[i] - rstart; 525 for (j = 0; j < n; j++) { 526 if (im[i] > in[j]) { 527 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 528 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 529 } 530 /* If NumCol = 1 then a copy is not required */ 531 if ((roworiented) && (n == 1)) { 532 barray = (MatScalar *)v + i * bs2; 533 } else if ((!roworiented) && (m == 1)) { 534 barray = (MatScalar *)v + j * bs2; 535 } else { /* Here a copy is required */ 536 if (roworiented) { 537 value = v + i * (stepval + bs) * bs + j * bs; 538 } else { 539 value = v + j * (stepval + bs) * bs + i * bs; 540 } 541 for (ii = 0; ii < bs; ii++, value += stepval) { 542 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 543 } 544 barray -= bs2; 545 } 546 547 if (in[j] >= cstart && in[j] < cend) { 548 col = in[j] - cstart; 549 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 550 } else if (in[j] < 0) { 551 continue; 552 } else { 553 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 554 if (mat->was_assembled) { 555 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 556 557 #if defined(PETSC_USE_DEBUG) 558 #if defined(PETSC_USE_CTABLE) 559 { 560 PetscInt data; 561 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 562 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 563 } 564 #else 565 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 566 #endif 567 #endif 568 #if defined(PETSC_USE_CTABLE) 569 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 570 col = (col - 1) / bs; 571 #else 572 col = (baij->colmap[in[j]] - 1) / bs; 573 #endif 574 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 575 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 576 col = in[j]; 577 } 578 } else col = in[j]; 579 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 580 } 581 } 582 } else { 583 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 584 if (!baij->donotstash) { 585 if (roworiented) { 586 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 587 } else { 588 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 589 } 590 } 591 } 592 } 593 PetscFunctionReturn(0); 594 } 595 596 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 597 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 598 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 599 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 600 601 PetscFunctionBegin; 602 for (i = 0; i < m; i++) { 603 if (idxm[i] < 0) continue; /* negative row */ 604 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 605 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 606 row = idxm[i] - bsrstart; 607 for (j = 0; j < n; j++) { 608 if (idxn[j] < 0) continue; /* negative column */ 609 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 610 if (idxn[j] >= bscstart && idxn[j] < bscend) { 611 col = idxn[j] - bscstart; 612 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 613 } else { 614 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 615 #if defined(PETSC_USE_CTABLE) 616 PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data)); 617 data--; 618 #else 619 data = baij->colmap[idxn[j] / bs] - 1; 620 #endif 621 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 622 else { 623 col = data + idxn[j] % bs; 624 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 625 } 626 } 627 } 628 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 629 } 630 PetscFunctionReturn(0); 631 } 632 633 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) { 634 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 635 PetscReal sum[2], *lnorm2; 636 637 PetscFunctionBegin; 638 if (baij->size == 1) { 639 PetscCall(MatNorm(baij->A, type, norm)); 640 } else { 641 if (type == NORM_FROBENIUS) { 642 PetscCall(PetscMalloc1(2, &lnorm2)); 643 PetscCall(MatNorm(baij->A, type, lnorm2)); 644 *lnorm2 = (*lnorm2) * (*lnorm2); 645 lnorm2++; /* squar power of norm(A) */ 646 PetscCall(MatNorm(baij->B, type, lnorm2)); 647 *lnorm2 = (*lnorm2) * (*lnorm2); 648 lnorm2--; /* squar power of norm(B) */ 649 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 650 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 651 PetscCall(PetscFree(lnorm2)); 652 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 653 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 654 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 655 PetscReal *rsum, *rsum2, vabs; 656 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 657 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 658 MatScalar *v; 659 660 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 661 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 662 /* Amat */ 663 v = amat->a; 664 jj = amat->j; 665 for (brow = 0; brow < mbs; brow++) { 666 grow = bs * (rstart + brow); 667 nz = amat->i[brow + 1] - amat->i[brow]; 668 for (bcol = 0; bcol < nz; bcol++) { 669 gcol = bs * (rstart + *jj); 670 jj++; 671 for (col = 0; col < bs; col++) { 672 for (row = 0; row < bs; row++) { 673 vabs = PetscAbsScalar(*v); 674 v++; 675 rsum[gcol + col] += vabs; 676 /* non-diagonal block */ 677 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 678 } 679 } 680 } 681 PetscCall(PetscLogFlops(nz * bs * bs)); 682 } 683 /* Bmat */ 684 v = bmat->a; 685 jj = bmat->j; 686 for (brow = 0; brow < mbs; brow++) { 687 grow = bs * (rstart + brow); 688 nz = bmat->i[brow + 1] - bmat->i[brow]; 689 for (bcol = 0; bcol < nz; bcol++) { 690 gcol = bs * garray[*jj]; 691 jj++; 692 for (col = 0; col < bs; col++) { 693 for (row = 0; row < bs; row++) { 694 vabs = PetscAbsScalar(*v); 695 v++; 696 rsum[gcol + col] += vabs; 697 rsum[grow + row] += vabs; 698 } 699 } 700 } 701 PetscCall(PetscLogFlops(nz * bs * bs)); 702 } 703 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 704 *norm = 0.0; 705 for (col = 0; col < mat->cmap->N; col++) { 706 if (rsum2[col] > *norm) *norm = rsum2[col]; 707 } 708 PetscCall(PetscFree2(rsum, rsum2)); 709 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 710 } 711 PetscFunctionReturn(0); 712 } 713 714 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) { 715 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 716 PetscInt nstash, reallocs; 717 718 PetscFunctionBegin; 719 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 720 721 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 722 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 723 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 724 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 725 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 726 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 727 PetscFunctionReturn(0); 728 } 729 730 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) { 731 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 732 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 733 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 734 PetscInt *row, *col; 735 PetscBool other_disassembled; 736 PetscMPIInt n; 737 PetscBool r1, r2, r3; 738 MatScalar *val; 739 740 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 741 PetscFunctionBegin; 742 if (!baij->donotstash && !mat->nooffprocentries) { 743 while (1) { 744 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 745 if (!flg) break; 746 747 for (i = 0; i < n;) { 748 /* Now identify the consecutive vals belonging to the same row */ 749 for (j = i, rstart = row[j]; j < n; j++) { 750 if (row[j] != rstart) break; 751 } 752 if (j < n) ncols = j - i; 753 else ncols = n - i; 754 /* Now assemble all these values with a single function call */ 755 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 756 i = j; 757 } 758 } 759 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 760 /* Now process the block-stash. Since the values are stashed column-oriented, 761 set the roworiented flag to column oriented, and after MatSetValues() 762 restore the original flags */ 763 r1 = baij->roworiented; 764 r2 = a->roworiented; 765 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 766 767 baij->roworiented = PETSC_FALSE; 768 a->roworiented = PETSC_FALSE; 769 770 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 771 while (1) { 772 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 773 if (!flg) break; 774 775 for (i = 0; i < n;) { 776 /* Now identify the consecutive vals belonging to the same row */ 777 for (j = i, rstart = row[j]; j < n; j++) { 778 if (row[j] != rstart) break; 779 } 780 if (j < n) ncols = j - i; 781 else ncols = n - i; 782 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 783 i = j; 784 } 785 } 786 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 787 788 baij->roworiented = r1; 789 a->roworiented = r2; 790 791 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 792 } 793 794 PetscCall(MatAssemblyBegin(baij->A, mode)); 795 PetscCall(MatAssemblyEnd(baij->A, mode)); 796 797 /* determine if any processor has disassembled, if so we must 798 also disassemble ourselves, in order that we may reassemble. */ 799 /* 800 if nonzero structure of submatrix B cannot change then we know that 801 no processor disassembled thus we can skip this stuff 802 */ 803 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 804 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 805 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 806 } 807 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 809 PetscCall(MatAssemblyBegin(baij->B, mode)); 810 PetscCall(MatAssemblyEnd(baij->B, mode)); 811 812 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 813 814 baij->rowvalues = NULL; 815 816 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 817 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 818 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 819 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 820 } 821 PetscFunctionReturn(0); 822 } 823 824 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 825 #include <petscdraw.h> 826 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 827 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 828 PetscInt bs = mat->rmap->bs; 829 PetscMPIInt rank = baij->rank; 830 PetscBool iascii, isdraw; 831 PetscViewer sviewer; 832 PetscViewerFormat format; 833 834 PetscFunctionBegin; 835 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 836 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 837 if (iascii) { 838 PetscCall(PetscViewerGetFormat(viewer, &format)); 839 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 840 MatInfo info; 841 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 842 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 843 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 844 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 845 mat->rmap->bs, (double)info.memory)); 846 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 847 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 848 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 849 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 850 PetscCall(PetscViewerFlush(viewer)); 851 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 852 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 853 PetscCall(VecScatterView(baij->Mvctx, viewer)); 854 PetscFunctionReturn(0); 855 } else if (format == PETSC_VIEWER_ASCII_INFO) { 856 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 857 PetscFunctionReturn(0); 858 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 859 PetscFunctionReturn(0); 860 } 861 } 862 863 if (isdraw) { 864 PetscDraw draw; 865 PetscBool isnull; 866 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 867 PetscCall(PetscDrawIsNull(draw, &isnull)); 868 if (isnull) PetscFunctionReturn(0); 869 } 870 871 { 872 /* assemble the entire matrix onto first processor. */ 873 Mat A; 874 Mat_SeqSBAIJ *Aloc; 875 Mat_SeqBAIJ *Bloc; 876 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 877 MatScalar *a; 878 const char *matname; 879 880 /* Should this be the same type as mat? */ 881 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 882 if (rank == 0) { 883 PetscCall(MatSetSizes(A, M, N, M, N)); 884 } else { 885 PetscCall(MatSetSizes(A, 0, 0, M, N)); 886 } 887 PetscCall(MatSetType(A, MATMPISBAIJ)); 888 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 889 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 890 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A)); 891 892 /* copy over the A part */ 893 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 894 ai = Aloc->i; 895 aj = Aloc->j; 896 a = Aloc->a; 897 PetscCall(PetscMalloc1(bs, &rvals)); 898 899 for (i = 0; i < mbs; i++) { 900 rvals[0] = bs * (baij->rstartbs + i); 901 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 902 for (j = ai[i]; j < ai[i + 1]; j++) { 903 col = (baij->cstartbs + aj[j]) * bs; 904 for (k = 0; k < bs; k++) { 905 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 906 col++; 907 a += bs; 908 } 909 } 910 } 911 /* copy over the B part */ 912 Bloc = (Mat_SeqBAIJ *)baij->B->data; 913 ai = Bloc->i; 914 aj = Bloc->j; 915 a = Bloc->a; 916 for (i = 0; i < mbs; i++) { 917 rvals[0] = bs * (baij->rstartbs + i); 918 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 919 for (j = ai[i]; j < ai[i + 1]; j++) { 920 col = baij->garray[aj[j]] * bs; 921 for (k = 0; k < bs; k++) { 922 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 923 col++; 924 a += bs; 925 } 926 } 927 } 928 PetscCall(PetscFree(rvals)); 929 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 930 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 931 /* 932 Everyone has to call to draw the matrix since the graphics waits are 933 synchronized across all processors that share the PetscDraw object 934 */ 935 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 936 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 937 if (rank == 0) { 938 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 939 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 940 } 941 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 942 PetscCall(PetscViewerFlush(viewer)); 943 PetscCall(MatDestroy(&A)); 944 } 945 PetscFunctionReturn(0); 946 } 947 948 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 949 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 950 951 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) { 952 PetscBool iascii, isdraw, issocket, isbinary; 953 954 PetscFunctionBegin; 955 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 956 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 957 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 958 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 959 if (iascii || isdraw || issocket) { 960 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 961 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 962 PetscFunctionReturn(0); 963 } 964 965 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) { 966 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 967 968 PetscFunctionBegin; 969 #if defined(PETSC_USE_LOG) 970 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 971 #endif 972 PetscCall(MatStashDestroy_Private(&mat->stash)); 973 PetscCall(MatStashDestroy_Private(&mat->bstash)); 974 PetscCall(MatDestroy(&baij->A)); 975 PetscCall(MatDestroy(&baij->B)); 976 #if defined(PETSC_USE_CTABLE) 977 PetscCall(PetscTableDestroy(&baij->colmap)); 978 #else 979 PetscCall(PetscFree(baij->colmap)); 980 #endif 981 PetscCall(PetscFree(baij->garray)); 982 PetscCall(VecDestroy(&baij->lvec)); 983 PetscCall(VecScatterDestroy(&baij->Mvctx)); 984 PetscCall(VecDestroy(&baij->slvec0)); 985 PetscCall(VecDestroy(&baij->slvec0b)); 986 PetscCall(VecDestroy(&baij->slvec1)); 987 PetscCall(VecDestroy(&baij->slvec1a)); 988 PetscCall(VecDestroy(&baij->slvec1b)); 989 PetscCall(VecScatterDestroy(&baij->sMvctx)); 990 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 991 PetscCall(PetscFree(baij->barray)); 992 PetscCall(PetscFree(baij->hd)); 993 PetscCall(VecDestroy(&baij->diag)); 994 PetscCall(VecDestroy(&baij->bb1)); 995 PetscCall(VecDestroy(&baij->xx1)); 996 #if defined(PETSC_USE_REAL_MAT_SINGLE) 997 PetscCall(PetscFree(baij->setvaluescopy)); 998 #endif 999 PetscCall(PetscFree(baij->in_loc)); 1000 PetscCall(PetscFree(baij->v_loc)); 1001 PetscCall(PetscFree(baij->rangebs)); 1002 PetscCall(PetscFree(mat->data)); 1003 1004 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1005 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1006 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1007 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 1008 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 1009 #if defined(PETSC_HAVE_ELEMENTAL) 1010 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 1011 #endif 1012 #if defined(PETSC_HAVE_SCALAPACK) 1013 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 1014 #endif 1015 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 1016 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) { 1021 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1022 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1023 PetscScalar *from; 1024 const PetscScalar *x; 1025 1026 PetscFunctionBegin; 1027 /* diagonal part */ 1028 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1029 PetscCall(VecSet(a->slvec1b, 0.0)); 1030 1031 /* subdiagonal part */ 1032 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1033 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1034 1035 /* copy x into the vec slvec0 */ 1036 PetscCall(VecGetArray(a->slvec0, &from)); 1037 PetscCall(VecGetArrayRead(xx, &x)); 1038 1039 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1040 PetscCall(VecRestoreArray(a->slvec0, &from)); 1041 PetscCall(VecRestoreArrayRead(xx, &x)); 1042 1043 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1044 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1045 /* supperdiagonal part */ 1046 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) { 1051 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1052 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1053 PetscScalar *from; 1054 const PetscScalar *x; 1055 1056 PetscFunctionBegin; 1057 /* diagonal part */ 1058 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1059 PetscCall(VecSet(a->slvec1b, 0.0)); 1060 1061 /* subdiagonal part */ 1062 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1063 1064 /* copy x into the vec slvec0 */ 1065 PetscCall(VecGetArray(a->slvec0, &from)); 1066 PetscCall(VecGetArrayRead(xx, &x)); 1067 1068 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1069 PetscCall(VecRestoreArray(a->slvec0, &from)); 1070 PetscCall(VecRestoreArrayRead(xx, &x)); 1071 1072 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1073 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1074 /* supperdiagonal part */ 1075 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1076 PetscFunctionReturn(0); 1077 } 1078 1079 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) { 1080 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1081 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1082 PetscScalar *from, zero = 0.0; 1083 const PetscScalar *x; 1084 1085 PetscFunctionBegin; 1086 /* diagonal part */ 1087 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1088 PetscCall(VecSet(a->slvec1b, zero)); 1089 1090 /* subdiagonal part */ 1091 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1092 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1093 1094 /* copy x into the vec slvec0 */ 1095 PetscCall(VecGetArray(a->slvec0, &from)); 1096 PetscCall(VecGetArrayRead(xx, &x)); 1097 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1098 PetscCall(VecRestoreArray(a->slvec0, &from)); 1099 1100 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1101 PetscCall(VecRestoreArrayRead(xx, &x)); 1102 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1103 1104 /* supperdiagonal part */ 1105 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1110 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1111 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1112 PetscScalar *from, zero = 0.0; 1113 const PetscScalar *x; 1114 1115 PetscFunctionBegin; 1116 /* diagonal part */ 1117 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1118 PetscCall(VecSet(a->slvec1b, zero)); 1119 1120 /* subdiagonal part */ 1121 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1122 1123 /* copy x into the vec slvec0 */ 1124 PetscCall(VecGetArray(a->slvec0, &from)); 1125 PetscCall(VecGetArrayRead(xx, &x)); 1126 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1127 PetscCall(VecRestoreArray(a->slvec0, &from)); 1128 1129 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1130 PetscCall(VecRestoreArrayRead(xx, &x)); 1131 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1132 1133 /* supperdiagonal part */ 1134 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 /* 1139 This only works correctly for square matrices where the subblock A->A is the 1140 diagonal block 1141 */ 1142 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) { 1143 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1144 1145 PetscFunctionBegin; 1146 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1147 PetscCall(MatGetDiagonal(a->A, v)); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) { 1152 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1153 1154 PetscFunctionBegin; 1155 PetscCall(MatScale(a->A, aa)); 1156 PetscCall(MatScale(a->B, aa)); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1161 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1162 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1163 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1164 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1165 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1166 1167 PetscFunctionBegin; 1168 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1169 mat->getrowactive = PETSC_TRUE; 1170 1171 if (!mat->rowvalues && (idx || v)) { 1172 /* 1173 allocate enough space to hold information from the longest row. 1174 */ 1175 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1176 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1177 PetscInt max = 1, mbs = mat->mbs, tmp; 1178 for (i = 0; i < mbs; i++) { 1179 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1180 if (max < tmp) max = tmp; 1181 } 1182 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1183 } 1184 1185 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1186 lrow = row - brstart; /* local row index */ 1187 1188 pvA = &vworkA; 1189 pcA = &cworkA; 1190 pvB = &vworkB; 1191 pcB = &cworkB; 1192 if (!v) { 1193 pvA = NULL; 1194 pvB = NULL; 1195 } 1196 if (!idx) { 1197 pcA = NULL; 1198 if (!v) pcB = NULL; 1199 } 1200 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1201 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1202 nztot = nzA + nzB; 1203 1204 cmap = mat->garray; 1205 if (v || idx) { 1206 if (nztot) { 1207 /* Sort by increasing column numbers, assuming A and B already sorted */ 1208 PetscInt imark = -1; 1209 if (v) { 1210 *v = v_p = mat->rowvalues; 1211 for (i = 0; i < nzB; i++) { 1212 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1213 else break; 1214 } 1215 imark = i; 1216 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1217 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1218 } 1219 if (idx) { 1220 *idx = idx_p = mat->rowindices; 1221 if (imark > -1) { 1222 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1223 } else { 1224 for (i = 0; i < nzB; i++) { 1225 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1226 else break; 1227 } 1228 imark = i; 1229 } 1230 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1231 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1232 } 1233 } else { 1234 if (idx) *idx = NULL; 1235 if (v) *v = NULL; 1236 } 1237 } 1238 *nz = nztot; 1239 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1240 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1245 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1246 1247 PetscFunctionBegin; 1248 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1249 baij->getrowactive = PETSC_FALSE; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) { 1254 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1255 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1256 1257 PetscFunctionBegin; 1258 aA->getrow_utriangular = PETSC_TRUE; 1259 PetscFunctionReturn(0); 1260 } 1261 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) { 1262 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1263 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1264 1265 PetscFunctionBegin; 1266 aA->getrow_utriangular = PETSC_FALSE; 1267 PetscFunctionReturn(0); 1268 } 1269 1270 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) { 1271 PetscFunctionBegin; 1272 if (PetscDefined(USE_COMPLEX)) { 1273 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1274 1275 PetscCall(MatConjugate(a->A)); 1276 PetscCall(MatConjugate(a->B)); 1277 } 1278 PetscFunctionReturn(0); 1279 } 1280 1281 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) { 1282 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1283 1284 PetscFunctionBegin; 1285 PetscCall(MatRealPart(a->A)); 1286 PetscCall(MatRealPart(a->B)); 1287 PetscFunctionReturn(0); 1288 } 1289 1290 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) { 1291 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1292 1293 PetscFunctionBegin; 1294 PetscCall(MatImaginaryPart(a->A)); 1295 PetscCall(MatImaginaryPart(a->B)); 1296 PetscFunctionReturn(0); 1297 } 1298 1299 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1300 Input: isrow - distributed(parallel), 1301 iscol_local - locally owned (seq) 1302 */ 1303 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) { 1304 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1305 const PetscInt *ptr1, *ptr2; 1306 1307 PetscFunctionBegin; 1308 PetscCall(ISGetLocalSize(isrow, &sz1)); 1309 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1310 if (sz1 > sz2) { 1311 *flg = PETSC_FALSE; 1312 PetscFunctionReturn(0); 1313 } 1314 1315 PetscCall(ISGetIndices(isrow, &ptr1)); 1316 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1317 1318 PetscCall(PetscMalloc1(sz1, &a1)); 1319 PetscCall(PetscMalloc1(sz2, &a2)); 1320 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1321 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1322 PetscCall(PetscSortInt(sz1, a1)); 1323 PetscCall(PetscSortInt(sz2, a2)); 1324 1325 nmatch = 0; 1326 k = 0; 1327 for (i = 0; i < sz1; i++) { 1328 for (j = k; j < sz2; j++) { 1329 if (a1[i] == a2[j]) { 1330 k = j; 1331 nmatch++; 1332 break; 1333 } 1334 } 1335 } 1336 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1337 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1338 PetscCall(PetscFree(a1)); 1339 PetscCall(PetscFree(a2)); 1340 if (nmatch < sz1) { 1341 *flg = PETSC_FALSE; 1342 } else { 1343 *flg = PETSC_TRUE; 1344 } 1345 PetscFunctionReturn(0); 1346 } 1347 1348 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) { 1349 IS iscol_local; 1350 PetscInt csize; 1351 PetscBool isequal; 1352 1353 PetscFunctionBegin; 1354 PetscCall(ISGetLocalSize(iscol, &csize)); 1355 if (call == MAT_REUSE_MATRIX) { 1356 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1357 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1358 } else { 1359 PetscBool issorted; 1360 1361 PetscCall(ISAllGather(iscol, &iscol_local)); 1362 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1363 PetscCall(ISSorted(iscol_local, &issorted)); 1364 PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 1365 } 1366 1367 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1368 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1369 if (call == MAT_INITIAL_MATRIX) { 1370 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1371 PetscCall(ISDestroy(&iscol_local)); 1372 } 1373 PetscFunctionReturn(0); 1374 } 1375 1376 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) { 1377 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1378 1379 PetscFunctionBegin; 1380 PetscCall(MatZeroEntries(l->A)); 1381 PetscCall(MatZeroEntries(l->B)); 1382 PetscFunctionReturn(0); 1383 } 1384 1385 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) { 1386 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1387 Mat A = a->A, B = a->B; 1388 PetscLogDouble isend[5], irecv[5]; 1389 1390 PetscFunctionBegin; 1391 info->block_size = (PetscReal)matin->rmap->bs; 1392 1393 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1394 1395 isend[0] = info->nz_used; 1396 isend[1] = info->nz_allocated; 1397 isend[2] = info->nz_unneeded; 1398 isend[3] = info->memory; 1399 isend[4] = info->mallocs; 1400 1401 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1402 1403 isend[0] += info->nz_used; 1404 isend[1] += info->nz_allocated; 1405 isend[2] += info->nz_unneeded; 1406 isend[3] += info->memory; 1407 isend[4] += info->mallocs; 1408 if (flag == MAT_LOCAL) { 1409 info->nz_used = isend[0]; 1410 info->nz_allocated = isend[1]; 1411 info->nz_unneeded = isend[2]; 1412 info->memory = isend[3]; 1413 info->mallocs = isend[4]; 1414 } else if (flag == MAT_GLOBAL_MAX) { 1415 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1416 1417 info->nz_used = irecv[0]; 1418 info->nz_allocated = irecv[1]; 1419 info->nz_unneeded = irecv[2]; 1420 info->memory = irecv[3]; 1421 info->mallocs = irecv[4]; 1422 } else if (flag == MAT_GLOBAL_SUM) { 1423 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1424 1425 info->nz_used = irecv[0]; 1426 info->nz_allocated = irecv[1]; 1427 info->nz_unneeded = irecv[2]; 1428 info->memory = irecv[3]; 1429 info->mallocs = irecv[4]; 1430 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1431 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1432 info->fill_ratio_needed = 0; 1433 info->factor_mallocs = 0; 1434 PetscFunctionReturn(0); 1435 } 1436 1437 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) { 1438 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1439 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1440 1441 PetscFunctionBegin; 1442 switch (op) { 1443 case MAT_NEW_NONZERO_LOCATIONS: 1444 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1445 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1446 case MAT_KEEP_NONZERO_PATTERN: 1447 case MAT_SUBMAT_SINGLEIS: 1448 case MAT_NEW_NONZERO_LOCATION_ERR: 1449 MatCheckPreallocated(A, 1); 1450 PetscCall(MatSetOption(a->A, op, flg)); 1451 PetscCall(MatSetOption(a->B, op, flg)); 1452 break; 1453 case MAT_ROW_ORIENTED: 1454 MatCheckPreallocated(A, 1); 1455 a->roworiented = flg; 1456 1457 PetscCall(MatSetOption(a->A, op, flg)); 1458 PetscCall(MatSetOption(a->B, op, flg)); 1459 break; 1460 case MAT_FORCE_DIAGONAL_ENTRIES: 1461 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1462 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 1463 case MAT_USE_HASH_TABLE: a->ht_flag = flg; break; 1464 case MAT_HERMITIAN: MatCheckPreallocated(A, 1); PetscCall(MatSetOption(a->A, op, flg)); 1465 #if defined(PETSC_USE_COMPLEX) 1466 if (flg) { /* need different mat-vec ops */ 1467 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1468 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1469 A->ops->multtranspose = NULL; 1470 A->ops->multtransposeadd = NULL; 1471 A->symmetric = PETSC_BOOL3_FALSE; 1472 } 1473 #endif 1474 break; 1475 case MAT_SPD: 1476 case MAT_SYMMETRIC: MatCheckPreallocated(A, 1); PetscCall(MatSetOption(a->A, op, flg)); 1477 #if defined(PETSC_USE_COMPLEX) 1478 if (flg) { /* restore to use default mat-vec ops */ 1479 A->ops->mult = MatMult_MPISBAIJ; 1480 A->ops->multadd = MatMultAdd_MPISBAIJ; 1481 A->ops->multtranspose = MatMult_MPISBAIJ; 1482 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1483 } 1484 #endif 1485 break; 1486 case MAT_STRUCTURALLY_SYMMETRIC: 1487 MatCheckPreallocated(A, 1); 1488 PetscCall(MatSetOption(a->A, op, flg)); 1489 break; 1490 case MAT_SYMMETRY_ETERNAL: 1491 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1492 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1493 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1494 break; 1495 case MAT_SPD_ETERNAL: break; 1496 case MAT_IGNORE_LOWER_TRIANGULAR: aA->ignore_ltriangular = flg; break; 1497 case MAT_ERROR_LOWER_TRIANGULAR: aA->ignore_ltriangular = flg; break; 1498 case MAT_GETROW_UPPERTRIANGULAR: aA->getrow_utriangular = flg; break; 1499 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) { 1505 PetscFunctionBegin; 1506 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1507 if (reuse == MAT_INITIAL_MATRIX) { 1508 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1509 } else if (reuse == MAT_REUSE_MATRIX) { 1510 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1511 } 1512 PetscFunctionReturn(0); 1513 } 1514 1515 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) { 1516 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1517 Mat a = baij->A, b = baij->B; 1518 PetscInt nv, m, n; 1519 PetscBool flg; 1520 1521 PetscFunctionBegin; 1522 if (ll != rr) { 1523 PetscCall(VecEqual(ll, rr, &flg)); 1524 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1525 } 1526 if (!ll) PetscFunctionReturn(0); 1527 1528 PetscCall(MatGetLocalSize(mat, &m, &n)); 1529 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1530 1531 PetscCall(VecGetLocalSize(rr, &nv)); 1532 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1533 1534 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1535 1536 /* left diagonalscale the off-diagonal part */ 1537 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1538 1539 /* scale the diagonal part */ 1540 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1541 1542 /* right diagonalscale the off-diagonal part */ 1543 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1544 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) { 1549 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1550 1551 PetscFunctionBegin; 1552 PetscCall(MatSetUnfactored(a->A)); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1557 1558 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) { 1559 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1560 Mat a, b, c, d; 1561 PetscBool flg; 1562 1563 PetscFunctionBegin; 1564 a = matA->A; 1565 b = matA->B; 1566 c = matB->A; 1567 d = matB->B; 1568 1569 PetscCall(MatEqual(a, c, &flg)); 1570 if (flg) PetscCall(MatEqual(b, d, &flg)); 1571 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) { 1576 PetscBool isbaij; 1577 1578 PetscFunctionBegin; 1579 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1580 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1581 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1582 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1583 PetscCall(MatGetRowUpperTriangular(A)); 1584 PetscCall(MatCopy_Basic(A, B, str)); 1585 PetscCall(MatRestoreRowUpperTriangular(A)); 1586 } else { 1587 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1588 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1589 1590 PetscCall(MatCopy(a->A, b->A, str)); 1591 PetscCall(MatCopy(a->B, b->B, str)); 1592 } 1593 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1594 PetscFunctionReturn(0); 1595 } 1596 1597 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) { 1598 PetscFunctionBegin; 1599 PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1600 PetscFunctionReturn(0); 1601 } 1602 1603 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 1604 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1605 PetscBLASInt bnz, one = 1; 1606 Mat_SeqSBAIJ *xa, *ya; 1607 Mat_SeqBAIJ *xb, *yb; 1608 1609 PetscFunctionBegin; 1610 if (str == SAME_NONZERO_PATTERN) { 1611 PetscScalar alpha = a; 1612 xa = (Mat_SeqSBAIJ *)xx->A->data; 1613 ya = (Mat_SeqSBAIJ *)yy->A->data; 1614 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1615 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1616 xb = (Mat_SeqBAIJ *)xx->B->data; 1617 yb = (Mat_SeqBAIJ *)yy->B->data; 1618 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1619 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1620 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1621 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1622 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1623 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1624 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1625 } else { 1626 Mat B; 1627 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1628 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1629 PetscCall(MatGetRowUpperTriangular(X)); 1630 PetscCall(MatGetRowUpperTriangular(Y)); 1631 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1632 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1633 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1634 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1635 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1636 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1637 PetscCall(MatSetType(B, MATMPISBAIJ)); 1638 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1639 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1640 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1641 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1642 PetscCall(MatHeaderMerge(Y, &B)); 1643 PetscCall(PetscFree(nnz_d)); 1644 PetscCall(PetscFree(nnz_o)); 1645 PetscCall(MatRestoreRowUpperTriangular(X)); 1646 PetscCall(MatRestoreRowUpperTriangular(Y)); 1647 } 1648 PetscFunctionReturn(0); 1649 } 1650 1651 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) { 1652 PetscInt i; 1653 PetscBool flg; 1654 1655 PetscFunctionBegin; 1656 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1657 for (i = 0; i < n; i++) { 1658 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1659 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) { 1665 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1666 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1667 1668 PetscFunctionBegin; 1669 if (!Y->preallocated) { 1670 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1671 } else if (!aij->nz) { 1672 PetscInt nonew = aij->nonew; 1673 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1674 aij->nonew = nonew; 1675 } 1676 PetscCall(MatShift_Basic(Y, a)); 1677 PetscFunctionReturn(0); 1678 } 1679 1680 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) { 1681 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1682 1683 PetscFunctionBegin; 1684 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1685 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1686 if (d) { 1687 PetscInt rstart; 1688 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1689 *d += rstart / A->rmap->bs; 1690 } 1691 PetscFunctionReturn(0); 1692 } 1693 1694 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) { 1695 PetscFunctionBegin; 1696 *a = ((Mat_MPISBAIJ *)A->data)->A; 1697 PetscFunctionReturn(0); 1698 } 1699 1700 /* -------------------------------------------------------------------*/ 1701 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1702 MatGetRow_MPISBAIJ, 1703 MatRestoreRow_MPISBAIJ, 1704 MatMult_MPISBAIJ, 1705 /* 4*/ MatMultAdd_MPISBAIJ, 1706 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1707 MatMultAdd_MPISBAIJ, 1708 NULL, 1709 NULL, 1710 NULL, 1711 /* 10*/ NULL, 1712 NULL, 1713 NULL, 1714 MatSOR_MPISBAIJ, 1715 MatTranspose_MPISBAIJ, 1716 /* 15*/ MatGetInfo_MPISBAIJ, 1717 MatEqual_MPISBAIJ, 1718 MatGetDiagonal_MPISBAIJ, 1719 MatDiagonalScale_MPISBAIJ, 1720 MatNorm_MPISBAIJ, 1721 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1722 MatAssemblyEnd_MPISBAIJ, 1723 MatSetOption_MPISBAIJ, 1724 MatZeroEntries_MPISBAIJ, 1725 /* 24*/ NULL, 1726 NULL, 1727 NULL, 1728 NULL, 1729 NULL, 1730 /* 29*/ MatSetUp_MPISBAIJ, 1731 NULL, 1732 NULL, 1733 MatGetDiagonalBlock_MPISBAIJ, 1734 NULL, 1735 /* 34*/ MatDuplicate_MPISBAIJ, 1736 NULL, 1737 NULL, 1738 NULL, 1739 NULL, 1740 /* 39*/ MatAXPY_MPISBAIJ, 1741 MatCreateSubMatrices_MPISBAIJ, 1742 MatIncreaseOverlap_MPISBAIJ, 1743 MatGetValues_MPISBAIJ, 1744 MatCopy_MPISBAIJ, 1745 /* 44*/ NULL, 1746 MatScale_MPISBAIJ, 1747 MatShift_MPISBAIJ, 1748 NULL, 1749 NULL, 1750 /* 49*/ NULL, 1751 NULL, 1752 NULL, 1753 NULL, 1754 NULL, 1755 /* 54*/ NULL, 1756 NULL, 1757 MatSetUnfactored_MPISBAIJ, 1758 NULL, 1759 MatSetValuesBlocked_MPISBAIJ, 1760 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1761 NULL, 1762 NULL, 1763 NULL, 1764 NULL, 1765 /* 64*/ NULL, 1766 NULL, 1767 NULL, 1768 NULL, 1769 NULL, 1770 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1771 NULL, 1772 MatConvert_MPISBAIJ_Basic, 1773 NULL, 1774 NULL, 1775 /* 74*/ NULL, 1776 NULL, 1777 NULL, 1778 NULL, 1779 NULL, 1780 /* 79*/ NULL, 1781 NULL, 1782 NULL, 1783 NULL, 1784 MatLoad_MPISBAIJ, 1785 /* 84*/ NULL, 1786 NULL, 1787 NULL, 1788 NULL, 1789 NULL, 1790 /* 89*/ NULL, 1791 NULL, 1792 NULL, 1793 NULL, 1794 NULL, 1795 /* 94*/ NULL, 1796 NULL, 1797 NULL, 1798 NULL, 1799 NULL, 1800 /* 99*/ NULL, 1801 NULL, 1802 NULL, 1803 MatConjugate_MPISBAIJ, 1804 NULL, 1805 /*104*/ NULL, 1806 MatRealPart_MPISBAIJ, 1807 MatImaginaryPart_MPISBAIJ, 1808 MatGetRowUpperTriangular_MPISBAIJ, 1809 MatRestoreRowUpperTriangular_MPISBAIJ, 1810 /*109*/ NULL, 1811 NULL, 1812 NULL, 1813 NULL, 1814 MatMissingDiagonal_MPISBAIJ, 1815 /*114*/ NULL, 1816 NULL, 1817 NULL, 1818 NULL, 1819 NULL, 1820 /*119*/ NULL, 1821 NULL, 1822 NULL, 1823 NULL, 1824 NULL, 1825 /*124*/ NULL, 1826 NULL, 1827 NULL, 1828 NULL, 1829 NULL, 1830 /*129*/ NULL, 1831 NULL, 1832 NULL, 1833 NULL, 1834 NULL, 1835 /*134*/ NULL, 1836 NULL, 1837 NULL, 1838 NULL, 1839 NULL, 1840 /*139*/ MatSetBlockSizes_Default, 1841 NULL, 1842 NULL, 1843 NULL, 1844 NULL, 1845 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1846 NULL, 1847 NULL, 1848 NULL, 1849 NULL, 1850 NULL, 1851 /*150*/ NULL}; 1852 1853 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) { 1854 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1855 PetscInt i, mbs, Mbs; 1856 PetscMPIInt size; 1857 1858 PetscFunctionBegin; 1859 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1860 PetscCall(PetscLayoutSetUp(B->rmap)); 1861 PetscCall(PetscLayoutSetUp(B->cmap)); 1862 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1863 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1864 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1865 1866 mbs = B->rmap->n / bs; 1867 Mbs = B->rmap->N / bs; 1868 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1869 1870 B->rmap->bs = bs; 1871 b->bs2 = bs * bs; 1872 b->mbs = mbs; 1873 b->Mbs = Mbs; 1874 b->nbs = B->cmap->n / bs; 1875 b->Nbs = B->cmap->N / bs; 1876 1877 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1878 b->rstartbs = B->rmap->rstart / bs; 1879 b->rendbs = B->rmap->rend / bs; 1880 1881 b->cstartbs = B->cmap->rstart / bs; 1882 b->cendbs = B->cmap->rend / bs; 1883 1884 #if defined(PETSC_USE_CTABLE) 1885 PetscCall(PetscTableDestroy(&b->colmap)); 1886 #else 1887 PetscCall(PetscFree(b->colmap)); 1888 #endif 1889 PetscCall(PetscFree(b->garray)); 1890 PetscCall(VecDestroy(&b->lvec)); 1891 PetscCall(VecScatterDestroy(&b->Mvctx)); 1892 PetscCall(VecDestroy(&b->slvec0)); 1893 PetscCall(VecDestroy(&b->slvec0b)); 1894 PetscCall(VecDestroy(&b->slvec1)); 1895 PetscCall(VecDestroy(&b->slvec1a)); 1896 PetscCall(VecDestroy(&b->slvec1b)); 1897 PetscCall(VecScatterDestroy(&b->sMvctx)); 1898 1899 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1900 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1901 PetscCall(MatDestroy(&b->B)); 1902 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1903 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1904 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1905 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->B)); 1906 1907 if (!B->preallocated) { 1908 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1909 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1910 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1911 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->A)); 1912 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1913 } 1914 1915 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1916 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1917 1918 B->preallocated = PETSC_TRUE; 1919 B->was_assembled = PETSC_FALSE; 1920 B->assembled = PETSC_FALSE; 1921 PetscFunctionReturn(0); 1922 } 1923 1924 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) { 1925 PetscInt m, rstart, cend; 1926 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1927 const PetscInt *JJ = NULL; 1928 PetscScalar *values = NULL; 1929 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 1930 PetscBool nooffprocentries; 1931 1932 PetscFunctionBegin; 1933 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 1934 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 1935 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 1936 PetscCall(PetscLayoutSetUp(B->rmap)); 1937 PetscCall(PetscLayoutSetUp(B->cmap)); 1938 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1939 m = B->rmap->n / bs; 1940 rstart = B->rmap->rstart / bs; 1941 cend = B->cmap->rend / bs; 1942 1943 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 1944 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 1945 for (i = 0; i < m; i++) { 1946 nz = ii[i + 1] - ii[i]; 1947 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 1948 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 1949 JJ = jj + ii[i]; 1950 bd = 0; 1951 for (j = 0; j < nz; j++) { 1952 if (*JJ >= i + rstart) break; 1953 JJ++; 1954 bd++; 1955 } 1956 d = 0; 1957 for (; j < nz; j++) { 1958 if (*JJ++ >= cend) break; 1959 d++; 1960 } 1961 d_nnz[i] = d; 1962 o_nnz[i] = nz - d - bd; 1963 nz = nz - bd; 1964 nz_max = PetscMax(nz_max, nz); 1965 } 1966 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 1967 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 1968 PetscCall(PetscFree2(d_nnz, o_nnz)); 1969 1970 values = (PetscScalar *)V; 1971 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 1972 for (i = 0; i < m; i++) { 1973 PetscInt row = i + rstart; 1974 PetscInt ncols = ii[i + 1] - ii[i]; 1975 const PetscInt *icols = jj + ii[i]; 1976 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 1977 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 1978 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 1979 } else { /* block ordering does not match so we can only insert one block at a time. */ 1980 PetscInt j; 1981 for (j = 0; j < ncols; j++) { 1982 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 1983 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 1984 } 1985 } 1986 } 1987 1988 if (!V) PetscCall(PetscFree(values)); 1989 nooffprocentries = B->nooffprocentries; 1990 B->nooffprocentries = PETSC_TRUE; 1991 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1992 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1993 B->nooffprocentries = nooffprocentries; 1994 1995 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 1996 PetscFunctionReturn(0); 1997 } 1998 1999 /*MC 2000 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2001 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2002 the matrix is stored. 2003 2004 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2005 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2006 2007 Options Database Keys: 2008 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2009 2010 Note: 2011 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2012 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2013 2014 Level: beginner 2015 2016 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2017 M*/ 2018 2019 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) { 2020 Mat_MPISBAIJ *b; 2021 PetscBool flg = PETSC_FALSE; 2022 2023 PetscFunctionBegin; 2024 PetscCall(PetscNewLog(B, &b)); 2025 B->data = (void *)b; 2026 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2027 2028 B->ops->destroy = MatDestroy_MPISBAIJ; 2029 B->ops->view = MatView_MPISBAIJ; 2030 B->assembled = PETSC_FALSE; 2031 B->insertmode = NOT_SET_VALUES; 2032 2033 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2034 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2035 2036 /* build local table of row and column ownerships */ 2037 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2038 2039 /* build cache for off array entries formed */ 2040 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2041 2042 b->donotstash = PETSC_FALSE; 2043 b->colmap = NULL; 2044 b->garray = NULL; 2045 b->roworiented = PETSC_TRUE; 2046 2047 /* stuff used in block assembly */ 2048 b->barray = NULL; 2049 2050 /* stuff used for matrix vector multiply */ 2051 b->lvec = NULL; 2052 b->Mvctx = NULL; 2053 b->slvec0 = NULL; 2054 b->slvec0b = NULL; 2055 b->slvec1 = NULL; 2056 b->slvec1a = NULL; 2057 b->slvec1b = NULL; 2058 b->sMvctx = NULL; 2059 2060 /* stuff for MatGetRow() */ 2061 b->rowindices = NULL; 2062 b->rowvalues = NULL; 2063 b->getrowactive = PETSC_FALSE; 2064 2065 /* hash table stuff */ 2066 b->ht = NULL; 2067 b->hd = NULL; 2068 b->ht_size = 0; 2069 b->ht_flag = PETSC_FALSE; 2070 b->ht_fact = 0; 2071 b->ht_total_ct = 0; 2072 b->ht_insert_ct = 0; 2073 2074 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2075 b->ijonly = PETSC_FALSE; 2076 2077 b->in_loc = NULL; 2078 b->v_loc = NULL; 2079 b->n_loc = 0; 2080 2081 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2082 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2083 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2084 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2085 #if defined(PETSC_HAVE_ELEMENTAL) 2086 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2087 #endif 2088 #if defined(PETSC_HAVE_SCALAPACK) 2089 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2090 #endif 2091 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2092 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2093 2094 B->symmetric = PETSC_BOOL3_TRUE; 2095 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2096 B->symmetry_eternal = PETSC_TRUE; 2097 B->structural_symmetry_eternal = PETSC_TRUE; 2098 #if defined(PETSC_USE_COMPLEX) 2099 B->hermitian = PETSC_BOOL3_FALSE; 2100 #else 2101 B->hermitian = PETSC_BOOL3_TRUE; 2102 #endif 2103 2104 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2105 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2106 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2107 if (flg) { 2108 PetscReal fact = 1.39; 2109 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2110 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2111 if (fact <= 1.0) fact = 1.39; 2112 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2113 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2114 } 2115 PetscOptionsEnd(); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 /*MC 2120 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2121 2122 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2123 and `MATMPISBAIJ` otherwise. 2124 2125 Options Database Key: 2126 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2127 2128 Level: beginner 2129 2130 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2131 M*/ 2132 2133 /*@C 2134 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2135 the user should preallocate the matrix storage by setting the parameters 2136 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2137 performance can be increased by more than a factor of 50. 2138 2139 Collective on B 2140 2141 Input Parameters: 2142 + B - the matrix 2143 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2144 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2145 . d_nz - number of block nonzeros per block row in diagonal portion of local 2146 submatrix (same for all local rows) 2147 . d_nnz - array containing the number of block nonzeros in the various block rows 2148 in the upper triangular and diagonal part of the in diagonal portion of the local 2149 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2150 for the diagonal entry and set a value even if it is zero. 2151 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2152 submatrix (same for all local rows). 2153 - o_nnz - array containing the number of nonzeros in the various block rows of the 2154 off-diagonal portion of the local submatrix that is right of the diagonal 2155 (possibly different for each block row) or NULL. 2156 2157 Options Database Keys: 2158 + -mat_no_unroll - uses code that does not unroll the loops in the 2159 block calculations (much slower) 2160 - -mat_block_size - size of the blocks to use 2161 2162 Notes: 2163 2164 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2165 than it must be used on all processors that share the object for that argument. 2166 2167 If the *_nnz parameter is given then the *_nz parameter is ignored 2168 2169 Storage Information: 2170 For a square global matrix we define each processor's diagonal portion 2171 to be its local rows and the corresponding columns (a square submatrix); 2172 each processor's off-diagonal portion encompasses the remainder of the 2173 local matrix (a rectangular submatrix). 2174 2175 The user can specify preallocated storage for the diagonal part of 2176 the local submatrix with either d_nz or d_nnz (not both). Set 2177 d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic 2178 memory allocation. Likewise, specify preallocated storage for the 2179 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2180 2181 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2182 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2183 You can also run with the option -info and look for messages with the string 2184 malloc in them to see if additional memory allocation was needed. 2185 2186 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2187 the figure below we depict these three local rows and all columns (0-11). 2188 2189 .vb 2190 0 1 2 3 4 5 6 7 8 9 10 11 2191 -------------------------- 2192 row 3 |. . . d d d o o o o o o 2193 row 4 |. . . d d d o o o o o o 2194 row 5 |. . . d d d o o o o o o 2195 -------------------------- 2196 .ve 2197 2198 Thus, any entries in the d locations are stored in the d (diagonal) 2199 submatrix, and any entries in the o locations are stored in the 2200 o (off-diagonal) submatrix. Note that the d matrix is stored in 2201 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2202 2203 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2204 plus the diagonal part of the d matrix, 2205 and o_nz should indicate the number of block nonzeros per row in the o matrix 2206 2207 In general, for PDE problems in which most nonzeros are near the diagonal, 2208 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2209 or you will get TERRIBLE performance; see the users' manual chapter on 2210 matrices. 2211 2212 Level: intermediate 2213 2214 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2215 @*/ 2216 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 2217 PetscFunctionBegin; 2218 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2219 PetscValidType(B, 1); 2220 PetscValidLogicalCollectiveInt(B, bs, 2); 2221 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 /*@C 2226 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2227 (block compressed row). For good matrix assembly performance 2228 the user should preallocate the matrix storage by setting the parameters 2229 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2230 performance can be increased by more than a factor of 50. 2231 2232 Collective 2233 2234 Input Parameters: 2235 + comm - MPI communicator 2236 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2237 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2238 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2239 This value should be the same as the local size used in creating the 2240 y vector for the matrix-vector product y = Ax. 2241 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2242 This value should be the same as the local size used in creating the 2243 x vector for the matrix-vector product y = Ax. 2244 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2245 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2246 . d_nz - number of block nonzeros per block row in diagonal portion of local 2247 submatrix (same for all local rows) 2248 . d_nnz - array containing the number of block nonzeros in the various block rows 2249 in the upper triangular portion of the in diagonal portion of the local 2250 (possibly different for each block block row) or NULL. 2251 If you plan to factor the matrix you must leave room for the diagonal entry and 2252 set its value even if it is zero. 2253 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2254 submatrix (same for all local rows). 2255 - o_nnz - array containing the number of nonzeros in the various block rows of the 2256 off-diagonal portion of the local submatrix (possibly different for 2257 each block row) or NULL. 2258 2259 Output Parameter: 2260 . A - the matrix 2261 2262 Options Database Keys: 2263 + -mat_no_unroll - uses code that does not unroll the loops in the 2264 block calculations (much slower) 2265 . -mat_block_size - size of the blocks to use 2266 - -mat_mpi - use the parallel matrix data structures even on one processor 2267 (defaults to using SeqBAIJ format on one processor) 2268 2269 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2270 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2271 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2272 2273 Notes: 2274 The number of rows and columns must be divisible by blocksize. 2275 This matrix type does not support complex Hermitian operation. 2276 2277 The user MUST specify either the local or global matrix dimensions 2278 (possibly both). 2279 2280 If` PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2281 than it must be used on all processors that share the object for that argument. 2282 2283 If the *_nnz parameter is given then the *_nz parameter is ignored 2284 2285 Storage Information: 2286 For a square global matrix we define each processor's diagonal portion 2287 to be its local rows and the corresponding columns (a square submatrix); 2288 each processor's off-diagonal portion encompasses the remainder of the 2289 local matrix (a rectangular submatrix). 2290 2291 The user can specify preallocated storage for the diagonal part of 2292 the local submatrix with either d_nz or d_nnz (not both). Set 2293 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2294 memory allocation. Likewise, specify preallocated storage for the 2295 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2296 2297 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2298 the figure below we depict these three local rows and all columns (0-11). 2299 2300 .vb 2301 0 1 2 3 4 5 6 7 8 9 10 11 2302 -------------------------- 2303 row 3 |. . . d d d o o o o o o 2304 row 4 |. . . d d d o o o o o o 2305 row 5 |. . . d d d o o o o o o 2306 -------------------------- 2307 .ve 2308 2309 Thus, any entries in the d locations are stored in the d (diagonal) 2310 submatrix, and any entries in the o locations are stored in the 2311 o (off-diagonal) submatrix. Note that the d matrix is stored in 2312 MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format. 2313 2314 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2315 plus the diagonal part of the d matrix, 2316 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2317 In general, for PDE problems in which most nonzeros are near the diagonal, 2318 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2319 or you will get TERRIBLE performance; see the users' manual chapter on 2320 matrices. 2321 2322 Level: intermediate 2323 2324 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2325 @*/ 2326 2327 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) { 2328 PetscMPIInt size; 2329 2330 PetscFunctionBegin; 2331 PetscCall(MatCreate(comm, A)); 2332 PetscCall(MatSetSizes(*A, m, n, M, N)); 2333 PetscCallMPI(MPI_Comm_size(comm, &size)); 2334 if (size > 1) { 2335 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2336 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2337 } else { 2338 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2339 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2340 } 2341 PetscFunctionReturn(0); 2342 } 2343 2344 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) { 2345 Mat mat; 2346 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2347 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2348 PetscScalar *array; 2349 2350 PetscFunctionBegin; 2351 *newmat = NULL; 2352 2353 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2354 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2355 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2356 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2357 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2358 2359 mat->factortype = matin->factortype; 2360 mat->preallocated = PETSC_TRUE; 2361 mat->assembled = PETSC_TRUE; 2362 mat->insertmode = NOT_SET_VALUES; 2363 2364 a = (Mat_MPISBAIJ *)mat->data; 2365 a->bs2 = oldmat->bs2; 2366 a->mbs = oldmat->mbs; 2367 a->nbs = oldmat->nbs; 2368 a->Mbs = oldmat->Mbs; 2369 a->Nbs = oldmat->Nbs; 2370 2371 a->size = oldmat->size; 2372 a->rank = oldmat->rank; 2373 a->donotstash = oldmat->donotstash; 2374 a->roworiented = oldmat->roworiented; 2375 a->rowindices = NULL; 2376 a->rowvalues = NULL; 2377 a->getrowactive = PETSC_FALSE; 2378 a->barray = NULL; 2379 a->rstartbs = oldmat->rstartbs; 2380 a->rendbs = oldmat->rendbs; 2381 a->cstartbs = oldmat->cstartbs; 2382 a->cendbs = oldmat->cendbs; 2383 2384 /* hash table stuff */ 2385 a->ht = NULL; 2386 a->hd = NULL; 2387 a->ht_size = 0; 2388 a->ht_flag = oldmat->ht_flag; 2389 a->ht_fact = oldmat->ht_fact; 2390 a->ht_total_ct = 0; 2391 a->ht_insert_ct = 0; 2392 2393 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2394 if (oldmat->colmap) { 2395 #if defined(PETSC_USE_CTABLE) 2396 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 2397 #else 2398 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2399 PetscCall(PetscLogObjectMemory((PetscObject)mat, (a->Nbs) * sizeof(PetscInt))); 2400 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2401 #endif 2402 } else a->colmap = NULL; 2403 2404 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2405 PetscCall(PetscMalloc1(len, &a->garray)); 2406 PetscCall(PetscLogObjectMemory((PetscObject)mat, len * sizeof(PetscInt))); 2407 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2408 } else a->garray = NULL; 2409 2410 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2411 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2412 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->lvec)); 2413 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2414 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->Mvctx)); 2415 2416 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2417 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec0)); 2418 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2419 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec1)); 2420 2421 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2422 PetscCall(VecGetArray(a->slvec1, &array)); 2423 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2424 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2425 PetscCall(VecRestoreArray(a->slvec1, &array)); 2426 PetscCall(VecGetArray(a->slvec0, &array)); 2427 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2428 PetscCall(VecRestoreArray(a->slvec0, &array)); 2429 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec0)); 2430 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec1)); 2431 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec0b)); 2432 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec1a)); 2433 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->slvec1b)); 2434 2435 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2436 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2437 a->sMvctx = oldmat->sMvctx; 2438 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->sMvctx)); 2439 2440 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2441 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 2442 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2443 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->B)); 2444 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2445 *newmat = mat; 2446 PetscFunctionReturn(0); 2447 } 2448 2449 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2450 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2451 2452 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) { 2453 PetscBool isbinary; 2454 2455 PetscFunctionBegin; 2456 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2457 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2458 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) { 2463 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2464 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2465 PetscReal atmp; 2466 PetscReal *work, *svalues, *rvalues; 2467 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2468 PetscMPIInt rank, size; 2469 PetscInt *rowners_bs, dest, count, source; 2470 PetscScalar *va; 2471 MatScalar *ba; 2472 MPI_Status stat; 2473 2474 PetscFunctionBegin; 2475 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2476 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2477 PetscCall(VecGetArray(v, &va)); 2478 2479 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2480 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2481 2482 bs = A->rmap->bs; 2483 mbs = a->mbs; 2484 Mbs = a->Mbs; 2485 ba = b->a; 2486 bi = b->i; 2487 bj = b->j; 2488 2489 /* find ownerships */ 2490 rowners_bs = A->rmap->range; 2491 2492 /* each proc creates an array to be distributed */ 2493 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2494 2495 /* row_max for B */ 2496 if (rank != size - 1) { 2497 for (i = 0; i < mbs; i++) { 2498 ncols = bi[1] - bi[0]; 2499 bi++; 2500 brow = bs * i; 2501 for (j = 0; j < ncols; j++) { 2502 bcol = bs * (*bj); 2503 for (kcol = 0; kcol < bs; kcol++) { 2504 col = bcol + kcol; /* local col index */ 2505 col += rowners_bs[rank + 1]; /* global col index */ 2506 for (krow = 0; krow < bs; krow++) { 2507 atmp = PetscAbsScalar(*ba); 2508 ba++; 2509 row = brow + krow; /* local row index */ 2510 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2511 if (work[col] < atmp) work[col] = atmp; 2512 } 2513 } 2514 bj++; 2515 } 2516 } 2517 2518 /* send values to its owners */ 2519 for (dest = rank + 1; dest < size; dest++) { 2520 svalues = work + rowners_bs[dest]; 2521 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2522 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2523 } 2524 } 2525 2526 /* receive values */ 2527 if (rank) { 2528 rvalues = work; 2529 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2530 for (source = 0; source < rank; source++) { 2531 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2532 /* process values */ 2533 for (i = 0; i < count; i++) { 2534 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2535 } 2536 } 2537 } 2538 2539 PetscCall(VecRestoreArray(v, &va)); 2540 PetscCall(PetscFree(work)); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 2545 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2546 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2547 PetscScalar *x, *ptr, *from; 2548 Vec bb1; 2549 const PetscScalar *b; 2550 2551 PetscFunctionBegin; 2552 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2553 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2554 2555 if (flag == SOR_APPLY_UPPER) { 2556 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2561 if (flag & SOR_ZERO_INITIAL_GUESS) { 2562 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2563 its--; 2564 } 2565 2566 PetscCall(VecDuplicate(bb, &bb1)); 2567 while (its--) { 2568 /* lower triangular part: slvec0b = - B^T*xx */ 2569 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2570 2571 /* copy xx into slvec0a */ 2572 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2573 PetscCall(VecGetArray(xx, &x)); 2574 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2575 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2576 2577 PetscCall(VecScale(mat->slvec0, -1.0)); 2578 2579 /* copy bb into slvec1a */ 2580 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2581 PetscCall(VecGetArrayRead(bb, &b)); 2582 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2583 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2584 2585 /* set slvec1b = 0 */ 2586 PetscCall(VecSet(mat->slvec1b, 0.0)); 2587 2588 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2589 PetscCall(VecRestoreArray(xx, &x)); 2590 PetscCall(VecRestoreArrayRead(bb, &b)); 2591 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2592 2593 /* upper triangular part: bb1 = bb1 - B*x */ 2594 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2595 2596 /* local diagonal sweep */ 2597 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2598 } 2599 PetscCall(VecDestroy(&bb1)); 2600 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2601 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2602 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2603 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2604 } else if (flag & SOR_EISENSTAT) { 2605 Vec xx1; 2606 PetscBool hasop; 2607 const PetscScalar *diag; 2608 PetscScalar *sl, scale = (omega - 2.0) / omega; 2609 PetscInt i, n; 2610 2611 if (!mat->xx1) { 2612 PetscCall(VecDuplicate(bb, &mat->xx1)); 2613 PetscCall(VecDuplicate(bb, &mat->bb1)); 2614 } 2615 xx1 = mat->xx1; 2616 bb1 = mat->bb1; 2617 2618 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2619 2620 if (!mat->diag) { 2621 /* this is wrong for same matrix with new nonzero values */ 2622 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2623 PetscCall(MatGetDiagonal(matin, mat->diag)); 2624 } 2625 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2626 2627 if (hasop) { 2628 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2629 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2630 } else { 2631 /* 2632 These two lines are replaced by code that may be a bit faster for a good compiler 2633 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2634 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2635 */ 2636 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2637 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2638 PetscCall(VecGetArrayRead(bb, &b)); 2639 PetscCall(VecGetArray(xx, &x)); 2640 PetscCall(VecGetLocalSize(xx, &n)); 2641 if (omega == 1.0) { 2642 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2643 PetscCall(PetscLogFlops(2.0 * n)); 2644 } else { 2645 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2646 PetscCall(PetscLogFlops(3.0 * n)); 2647 } 2648 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2649 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2650 PetscCall(VecRestoreArrayRead(bb, &b)); 2651 PetscCall(VecRestoreArray(xx, &x)); 2652 } 2653 2654 /* multiply off-diagonal portion of matrix */ 2655 PetscCall(VecSet(mat->slvec1b, 0.0)); 2656 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2657 PetscCall(VecGetArray(mat->slvec0, &from)); 2658 PetscCall(VecGetArray(xx, &x)); 2659 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2660 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2661 PetscCall(VecRestoreArray(xx, &x)); 2662 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2663 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2664 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2665 2666 /* local sweep */ 2667 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2668 PetscCall(VecAXPY(xx, 1.0, xx1)); 2669 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2670 PetscFunctionReturn(0); 2671 } 2672 2673 /*@ 2674 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2675 CSR format the local rows. 2676 2677 Collective 2678 2679 Input Parameters: 2680 + comm - MPI communicator 2681 . bs - the block size, only a block size of 1 is supported 2682 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2683 . n - This value should be the same as the local size used in creating the 2684 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2685 calculated if N is given) For square matrices n is almost always m. 2686 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2687 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2688 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2689 . j - column indices 2690 - a - matrix values 2691 2692 Output Parameter: 2693 . mat - the matrix 2694 2695 Level: intermediate 2696 2697 Notes: 2698 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2699 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2700 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2701 2702 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2703 2704 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2705 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2706 @*/ 2707 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) { 2708 PetscFunctionBegin; 2709 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2710 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2711 PetscCall(MatCreate(comm, mat)); 2712 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2713 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2714 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 /*@C 2719 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2720 2721 Collective 2722 2723 Input Parameters: 2724 + B - the matrix 2725 . bs - the block size 2726 . i - the indices into j for the start of each local row (starts with zero) 2727 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2728 - v - optional values in the matrix 2729 2730 Level: advanced 2731 2732 Notes: 2733 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2734 and usually the numerical values as well 2735 2736 Any entries below the diagonal are ignored 2737 2738 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2739 @*/ 2740 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 2741 PetscFunctionBegin; 2742 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2743 PetscFunctionReturn(0); 2744 } 2745 2746 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 2747 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2748 PetscInt *indx; 2749 PetscScalar *values; 2750 2751 PetscFunctionBegin; 2752 PetscCall(MatGetSize(inmat, &m, &N)); 2753 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2754 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2755 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2756 PetscInt *bindx, rmax = a->rmax, j; 2757 PetscMPIInt rank, size; 2758 2759 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2760 mbs = m / bs; 2761 Nbs = N / cbs; 2762 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2763 nbs = n / cbs; 2764 2765 PetscCall(PetscMalloc1(rmax, &bindx)); 2766 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2767 2768 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2769 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2770 if (rank == size - 1) { 2771 /* Check sum(nbs) = Nbs */ 2772 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2773 } 2774 2775 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2776 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2777 for (i = 0; i < mbs; i++) { 2778 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2779 nnz = nnz / bs; 2780 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2781 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2782 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2783 } 2784 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2785 PetscCall(PetscFree(bindx)); 2786 2787 PetscCall(MatCreate(comm, outmat)); 2788 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2789 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2790 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2791 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2792 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2793 MatPreallocateEnd(dnz, onz); 2794 } 2795 2796 /* numeric phase */ 2797 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2798 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2799 2800 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2801 for (i = 0; i < m; i++) { 2802 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2803 Ii = i + rstart; 2804 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2805 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2806 } 2807 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2808 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2809 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2810 PetscFunctionReturn(0); 2811 } 2812