1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r, rstart, rend; 19 PetscInt bs, i, m, n, M, N; 20 PetscBool cong = PETSC_TRUE; 21 22 PetscFunctionBegin; 23 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 24 PetscValidLogicalCollectiveInt(B, nm, 2); 25 for (i = 0; i < nm; i++) { 26 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 27 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 28 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 29 } 30 PetscValidLogicalCollectiveBool(B, fill, 5); 31 PetscCall(MatGetBlockSize(B, &bs)); 32 PetscCall(MatGetSize(B, &M, &N)); 33 PetscCall(MatGetLocalSize(B, &m, &n)); 34 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 35 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 36 PetscCall(MatSetBlockSize(preallocator, bs)); 37 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 38 PetscCall(MatSetUp(preallocator)); 39 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 40 for (r = rstart; r < rend; ++r) { 41 PetscInt ncols; 42 const PetscInt *row; 43 const PetscScalar *vals; 44 45 for (i = 0; i < nm; i++) { 46 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 47 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 48 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 49 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 50 } 51 } 52 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 53 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 54 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 55 PetscCall(MatDestroy(&preallocator)); 56 PetscFunctionReturn(0); 57 } 58 59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscInt r; 63 64 PetscFunctionBegin; 65 if (reuse != MAT_REUSE_MATRIX) { 66 PetscBool symm = PETSC_TRUE, isdense; 67 PetscInt bs; 68 69 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 70 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 71 PetscCall(MatSetType(B, newtype)); 72 PetscCall(MatGetBlockSize(A, &bs)); 73 PetscCall(MatSetBlockSize(B, bs)); 74 PetscCall(PetscLayoutSetUp(B->rmap)); 75 PetscCall(PetscLayoutSetUp(B->cmap)); 76 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 77 if (!isdense) { 78 PetscCall(MatGetRowUpperTriangular(A)); 79 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 80 PetscCall(MatRestoreRowUpperTriangular(A)); 81 } else { 82 PetscCall(MatSetUp(B)); 83 } 84 } else { 85 B = *newmat; 86 PetscCall(MatZeroEntries(B)); 87 } 88 89 PetscCall(MatGetRowUpperTriangular(A)); 90 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 91 PetscInt ncols; 92 const PetscInt *row; 93 const PetscScalar *vals; 94 95 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 96 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 97 #if defined(PETSC_USE_COMPLEX) 98 if (A->hermitian == PETSC_BOOL3_TRUE) { 99 PetscInt i; 100 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 101 } else { 102 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 103 } 104 #else 105 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 106 #endif 107 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 108 } 109 PetscCall(MatRestoreRowUpperTriangular(A)); 110 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 111 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 112 113 if (reuse == MAT_INPLACE_MATRIX) { 114 PetscCall(MatHeaderReplace(A, &B)); 115 } else { 116 *newmat = B; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 122 { 123 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 124 125 PetscFunctionBegin; 126 PetscCall(MatStoreValues(aij->A)); 127 PetscCall(MatStoreValues(aij->B)); 128 PetscFunctionReturn(0); 129 } 130 131 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 132 { 133 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 134 135 PetscFunctionBegin; 136 PetscCall(MatRetrieveValues(aij->A)); 137 PetscCall(MatRetrieveValues(aij->B)); 138 PetscFunctionReturn(0); 139 } 140 141 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 142 { \ 143 brow = row / bs; \ 144 rp = aj + ai[brow]; \ 145 ap = aa + bs2 * ai[brow]; \ 146 rmax = aimax[brow]; \ 147 nrow = ailen[brow]; \ 148 bcol = col / bs; \ 149 ridx = row % bs; \ 150 cidx = col % bs; \ 151 low = 0; \ 152 high = nrow; \ 153 while (high - low > 3) { \ 154 t = (low + high) / 2; \ 155 if (rp[t] > bcol) high = t; \ 156 else low = t; \ 157 } \ 158 for (_i = low; _i < high; _i++) { \ 159 if (rp[_i] > bcol) break; \ 160 if (rp[_i] == bcol) { \ 161 bap = ap + bs2 * _i + bs * cidx + ridx; \ 162 if (addv == ADD_VALUES) *bap += value; \ 163 else *bap = value; \ 164 goto a_noinsert; \ 165 } \ 166 } \ 167 if (a->nonew == 1) goto a_noinsert; \ 168 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 169 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 170 N = nrow++ - 1; \ 171 /* shift up all the later entries in this row */ \ 172 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 173 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 174 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 175 rp[_i] = bcol; \ 176 ap[bs2 * _i + bs * cidx + ridx] = value; \ 177 A->nonzerostate++; \ 178 a_noinsert:; \ 179 ailen[brow] = nrow; \ 180 } 181 182 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 183 { \ 184 brow = row / bs; \ 185 rp = bj + bi[brow]; \ 186 ap = ba + bs2 * bi[brow]; \ 187 rmax = bimax[brow]; \ 188 nrow = bilen[brow]; \ 189 bcol = col / bs; \ 190 ridx = row % bs; \ 191 cidx = col % bs; \ 192 low = 0; \ 193 high = nrow; \ 194 while (high - low > 3) { \ 195 t = (low + high) / 2; \ 196 if (rp[t] > bcol) high = t; \ 197 else low = t; \ 198 } \ 199 for (_i = low; _i < high; _i++) { \ 200 if (rp[_i] > bcol) break; \ 201 if (rp[_i] == bcol) { \ 202 bap = ap + bs2 * _i + bs * cidx + ridx; \ 203 if (addv == ADD_VALUES) *bap += value; \ 204 else *bap = value; \ 205 goto b_noinsert; \ 206 } \ 207 } \ 208 if (b->nonew == 1) goto b_noinsert; \ 209 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 210 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 211 N = nrow++ - 1; \ 212 /* shift up all the later entries in this row */ \ 213 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 214 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 215 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 216 rp[_i] = bcol; \ 217 ap[bs2 * _i + bs * cidx + ridx] = value; \ 218 B->nonzerostate++; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } 222 223 /* Only add/insert a(i,j) with i<=j (blocks). 224 Any a(i,j) with i>j input by user is ingored or generates an error 225 */ 226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 227 { 228 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 229 MatScalar value; 230 PetscBool roworiented = baij->roworiented; 231 PetscInt i, j, row, col; 232 PetscInt rstart_orig = mat->rmap->rstart; 233 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 234 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 235 236 /* Some Variables required in the macro */ 237 Mat A = baij->A; 238 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 239 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 240 MatScalar *aa = a->a; 241 242 Mat B = baij->B; 243 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 244 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 245 MatScalar *ba = b->a; 246 247 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 248 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 249 MatScalar *ap, *bap; 250 251 /* for stash */ 252 PetscInt n_loc, *in_loc = NULL; 253 MatScalar *v_loc = NULL; 254 255 PetscFunctionBegin; 256 if (!baij->donotstash) { 257 if (n > baij->n_loc) { 258 PetscCall(PetscFree(baij->in_loc)); 259 PetscCall(PetscFree(baij->v_loc)); 260 PetscCall(PetscMalloc1(n, &baij->in_loc)); 261 PetscCall(PetscMalloc1(n, &baij->v_loc)); 262 263 baij->n_loc = n; 264 } 265 in_loc = baij->in_loc; 266 v_loc = baij->v_loc; 267 } 268 269 for (i = 0; i < m; i++) { 270 if (im[i] < 0) continue; 271 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 272 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 273 row = im[i] - rstart_orig; /* local row index */ 274 for (j = 0; j < n; j++) { 275 if (im[i] / bs > in[j] / bs) { 276 if (a->ignore_ltriangular) { 277 continue; /* ignore lower triangular blocks */ 278 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 279 } 280 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 281 col = in[j] - cstart_orig; /* local col index */ 282 brow = row / bs; 283 bcol = col / bs; 284 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 285 if (roworiented) value = v[i * n + j]; 286 else value = v[i + j * m]; 287 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 288 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 289 } else if (in[j] < 0) { 290 continue; 291 } else { 292 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 293 /* off-diag entry (B) */ 294 if (mat->was_assembled) { 295 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 296 #if defined(PETSC_USE_CTABLE) 297 PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col)); 298 col = col - 1; 299 #else 300 col = baij->colmap[in[j] / bs] - 1; 301 #endif 302 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 303 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 304 col = in[j]; 305 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 306 B = baij->B; 307 b = (Mat_SeqBAIJ *)(B)->data; 308 bimax = b->imax; 309 bi = b->i; 310 bilen = b->ilen; 311 bj = b->j; 312 ba = b->a; 313 } else col += in[j] % bs; 314 } else col = in[j]; 315 if (roworiented) value = v[i * n + j]; 316 else value = v[i + j * m]; 317 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 318 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 319 } 320 } 321 } else { /* off processor entry */ 322 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 323 if (!baij->donotstash) { 324 mat->assembled = PETSC_FALSE; 325 n_loc = 0; 326 for (j = 0; j < n; j++) { 327 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 328 in_loc[n_loc] = in[j]; 329 if (roworiented) { 330 v_loc[n_loc] = v[i * n + j]; 331 } else { 332 v_loc[n_loc] = v[j * m + i]; 333 } 334 n_loc++; 335 } 336 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 337 } 338 } 339 } 340 PetscFunctionReturn(0); 341 } 342 343 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 344 { 345 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 346 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 347 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 348 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 349 PetscBool roworiented = a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap, *aa = a->a, *bap; 352 353 PetscFunctionBegin; 354 if (col < row) { 355 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 356 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 357 } 358 rp = aj + ai[row]; 359 ap = aa + bs2 * ai[row]; 360 rmax = imax[row]; 361 nrow = ailen[row]; 362 value = v; 363 low = 0; 364 high = nrow; 365 366 while (high - low > 7) { 367 t = (low + high) / 2; 368 if (rp[t] > col) high = t; 369 else low = t; 370 } 371 for (i = low; i < high; i++) { 372 if (rp[i] > col) break; 373 if (rp[i] == col) { 374 bap = ap + bs2 * i; 375 if (roworiented) { 376 if (is == ADD_VALUES) { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 379 } 380 } else { 381 for (ii = 0; ii < bs; ii++) { 382 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 383 } 384 } 385 } else { 386 if (is == ADD_VALUES) { 387 for (ii = 0; ii < bs; ii++) { 388 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 389 } 390 } else { 391 for (ii = 0; ii < bs; ii++) { 392 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 393 } 394 } 395 } 396 goto noinsert2; 397 } 398 } 399 if (nonew == 1) goto noinsert2; 400 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 401 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 402 N = nrow++ - 1; 403 high++; 404 /* shift up all the later entries in this row */ 405 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 406 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 407 rp[i] = col; 408 bap = ap + bs2 * i; 409 if (roworiented) { 410 for (ii = 0; ii < bs; ii++) { 411 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 412 } 413 } else { 414 for (ii = 0; ii < bs; ii++) { 415 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 416 } 417 } 418 noinsert2:; 419 ailen[row] = nrow; 420 PetscFunctionReturn(0); 421 } 422 423 /* 424 This routine is exactly duplicated in mpibaij.c 425 */ 426 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 427 { 428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 429 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 430 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 431 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 432 PetscBool roworiented = a->roworiented; 433 const PetscScalar *value = v; 434 MatScalar *ap, *aa = a->a, *bap; 435 436 PetscFunctionBegin; 437 rp = aj + ai[row]; 438 ap = aa + bs2 * ai[row]; 439 rmax = imax[row]; 440 nrow = ailen[row]; 441 low = 0; 442 high = nrow; 443 value = v; 444 while (high - low > 7) { 445 t = (low + high) / 2; 446 if (rp[t] > col) high = t; 447 else low = t; 448 } 449 for (i = low; i < high; i++) { 450 if (rp[i] > col) break; 451 if (rp[i] == col) { 452 bap = ap + bs2 * i; 453 if (roworiented) { 454 if (is == ADD_VALUES) { 455 for (ii = 0; ii < bs; ii++) { 456 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 457 } 458 } else { 459 for (ii = 0; ii < bs; ii++) { 460 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 461 } 462 } 463 } else { 464 if (is == ADD_VALUES) { 465 for (ii = 0; ii < bs; ii++, value += bs) { 466 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 467 bap += bs; 468 } 469 } else { 470 for (ii = 0; ii < bs; ii++, value += bs) { 471 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 472 bap += bs; 473 } 474 } 475 } 476 goto noinsert2; 477 } 478 } 479 if (nonew == 1) goto noinsert2; 480 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 481 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 482 N = nrow++ - 1; 483 high++; 484 /* shift up all the later entries in this row */ 485 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 486 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 487 rp[i] = col; 488 bap = ap + bs2 * i; 489 if (roworiented) { 490 for (ii = 0; ii < bs; ii++) { 491 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 492 } 493 } else { 494 for (ii = 0; ii < bs; ii++) { 495 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 496 } 497 } 498 noinsert2:; 499 ailen[row] = nrow; 500 PetscFunctionReturn(0); 501 } 502 503 /* 504 This routine could be optimized by removing the need for the block copy below and passing stride information 505 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 506 */ 507 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 508 { 509 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 510 const MatScalar *value; 511 MatScalar *barray = baij->barray; 512 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 513 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 514 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 515 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 516 517 PetscFunctionBegin; 518 if (!barray) { 519 PetscCall(PetscMalloc1(bs2, &barray)); 520 baij->barray = barray; 521 } 522 523 if (roworiented) { 524 stepval = (n - 1) * bs; 525 } else { 526 stepval = (m - 1) * bs; 527 } 528 for (i = 0; i < m; i++) { 529 if (im[i] < 0) continue; 530 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 531 if (im[i] >= rstart && im[i] < rend) { 532 row = im[i] - rstart; 533 for (j = 0; j < n; j++) { 534 if (im[i] > in[j]) { 535 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 536 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 537 } 538 /* If NumCol = 1 then a copy is not required */ 539 if ((roworiented) && (n == 1)) { 540 barray = (MatScalar *)v + i * bs2; 541 } else if ((!roworiented) && (m == 1)) { 542 barray = (MatScalar *)v + j * bs2; 543 } else { /* Here a copy is required */ 544 if (roworiented) { 545 value = v + i * (stepval + bs) * bs + j * bs; 546 } else { 547 value = v + j * (stepval + bs) * bs + i * bs; 548 } 549 for (ii = 0; ii < bs; ii++, value += stepval) { 550 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 551 } 552 barray -= bs2; 553 } 554 555 if (in[j] >= cstart && in[j] < cend) { 556 col = in[j] - cstart; 557 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 558 } else if (in[j] < 0) { 559 continue; 560 } else { 561 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 562 if (mat->was_assembled) { 563 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 564 565 #if defined(PETSC_USE_DEBUG) 566 #if defined(PETSC_USE_CTABLE) 567 { 568 PetscInt data; 569 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 570 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 571 } 572 #else 573 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 574 #endif 575 #endif 576 #if defined(PETSC_USE_CTABLE) 577 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 578 col = (col - 1) / bs; 579 #else 580 col = (baij->colmap[in[j]] - 1) / bs; 581 #endif 582 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 583 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 584 col = in[j]; 585 } 586 } else col = in[j]; 587 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 588 } 589 } 590 } else { 591 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 592 if (!baij->donotstash) { 593 if (roworiented) { 594 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 595 } else { 596 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 597 } 598 } 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 605 { 606 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 607 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 608 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 609 610 PetscFunctionBegin; 611 for (i = 0; i < m; i++) { 612 if (idxm[i] < 0) continue; /* negative row */ 613 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 614 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 615 row = idxm[i] - bsrstart; 616 for (j = 0; j < n; j++) { 617 if (idxn[j] < 0) continue; /* negative column */ 618 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 619 if (idxn[j] >= bscstart && idxn[j] < bscend) { 620 col = idxn[j] - bscstart; 621 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 622 } else { 623 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 624 #if defined(PETSC_USE_CTABLE) 625 PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data)); 626 data--; 627 #else 628 data = baij->colmap[idxn[j] / bs] - 1; 629 #endif 630 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 631 else { 632 col = data + idxn[j] % bs; 633 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 634 } 635 } 636 } 637 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 638 } 639 PetscFunctionReturn(0); 640 } 641 642 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 643 { 644 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 645 PetscReal sum[2], *lnorm2; 646 647 PetscFunctionBegin; 648 if (baij->size == 1) { 649 PetscCall(MatNorm(baij->A, type, norm)); 650 } else { 651 if (type == NORM_FROBENIUS) { 652 PetscCall(PetscMalloc1(2, &lnorm2)); 653 PetscCall(MatNorm(baij->A, type, lnorm2)); 654 *lnorm2 = (*lnorm2) * (*lnorm2); 655 lnorm2++; /* squar power of norm(A) */ 656 PetscCall(MatNorm(baij->B, type, lnorm2)); 657 *lnorm2 = (*lnorm2) * (*lnorm2); 658 lnorm2--; /* squar power of norm(B) */ 659 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 660 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 661 PetscCall(PetscFree(lnorm2)); 662 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 663 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 664 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 665 PetscReal *rsum, *rsum2, vabs; 666 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 667 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 668 MatScalar *v; 669 670 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 671 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 672 /* Amat */ 673 v = amat->a; 674 jj = amat->j; 675 for (brow = 0; brow < mbs; brow++) { 676 grow = bs * (rstart + brow); 677 nz = amat->i[brow + 1] - amat->i[brow]; 678 for (bcol = 0; bcol < nz; bcol++) { 679 gcol = bs * (rstart + *jj); 680 jj++; 681 for (col = 0; col < bs; col++) { 682 for (row = 0; row < bs; row++) { 683 vabs = PetscAbsScalar(*v); 684 v++; 685 rsum[gcol + col] += vabs; 686 /* non-diagonal block */ 687 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 688 } 689 } 690 } 691 PetscCall(PetscLogFlops(nz * bs * bs)); 692 } 693 /* Bmat */ 694 v = bmat->a; 695 jj = bmat->j; 696 for (brow = 0; brow < mbs; brow++) { 697 grow = bs * (rstart + brow); 698 nz = bmat->i[brow + 1] - bmat->i[brow]; 699 for (bcol = 0; bcol < nz; bcol++) { 700 gcol = bs * garray[*jj]; 701 jj++; 702 for (col = 0; col < bs; col++) { 703 for (row = 0; row < bs; row++) { 704 vabs = PetscAbsScalar(*v); 705 v++; 706 rsum[gcol + col] += vabs; 707 rsum[grow + row] += vabs; 708 } 709 } 710 } 711 PetscCall(PetscLogFlops(nz * bs * bs)); 712 } 713 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 714 *norm = 0.0; 715 for (col = 0; col < mat->cmap->N; col++) { 716 if (rsum2[col] > *norm) *norm = rsum2[col]; 717 } 718 PetscCall(PetscFree2(rsum, rsum2)); 719 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 725 { 726 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 727 PetscInt nstash, reallocs; 728 729 PetscFunctionBegin; 730 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 731 732 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 733 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 734 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 735 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 736 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 737 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 738 PetscFunctionReturn(0); 739 } 740 741 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 742 { 743 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 744 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 745 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 746 PetscInt *row, *col; 747 PetscBool other_disassembled; 748 PetscMPIInt n; 749 PetscBool r1, r2, r3; 750 MatScalar *val; 751 752 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 753 PetscFunctionBegin; 754 if (!baij->donotstash && !mat->nooffprocentries) { 755 while (1) { 756 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 757 if (!flg) break; 758 759 for (i = 0; i < n;) { 760 /* Now identify the consecutive vals belonging to the same row */ 761 for (j = i, rstart = row[j]; j < n; j++) { 762 if (row[j] != rstart) break; 763 } 764 if (j < n) ncols = j - i; 765 else ncols = n - i; 766 /* Now assemble all these values with a single function call */ 767 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 768 i = j; 769 } 770 } 771 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 772 /* Now process the block-stash. Since the values are stashed column-oriented, 773 set the roworiented flag to column oriented, and after MatSetValues() 774 restore the original flags */ 775 r1 = baij->roworiented; 776 r2 = a->roworiented; 777 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 778 779 baij->roworiented = PETSC_FALSE; 780 a->roworiented = PETSC_FALSE; 781 782 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 783 while (1) { 784 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 785 if (!flg) break; 786 787 for (i = 0; i < n;) { 788 /* Now identify the consecutive vals belonging to the same row */ 789 for (j = i, rstart = row[j]; j < n; j++) { 790 if (row[j] != rstart) break; 791 } 792 if (j < n) ncols = j - i; 793 else ncols = n - i; 794 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 795 i = j; 796 } 797 } 798 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 799 800 baij->roworiented = r1; 801 a->roworiented = r2; 802 803 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 804 } 805 806 PetscCall(MatAssemblyBegin(baij->A, mode)); 807 PetscCall(MatAssemblyEnd(baij->A, mode)); 808 809 /* determine if any processor has disassembled, if so we must 810 also disassemble ourselves, in order that we may reassemble. */ 811 /* 812 if nonzero structure of submatrix B cannot change then we know that 813 no processor disassembled thus we can skip this stuff 814 */ 815 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 816 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 817 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 818 } 819 820 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 821 PetscCall(MatAssemblyBegin(baij->B, mode)); 822 PetscCall(MatAssemblyEnd(baij->B, mode)); 823 824 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 825 826 baij->rowvalues = NULL; 827 828 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 829 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 830 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 831 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 837 #include <petscdraw.h> 838 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 839 { 840 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 841 PetscInt bs = mat->rmap->bs; 842 PetscMPIInt rank = baij->rank; 843 PetscBool iascii, isdraw; 844 PetscViewer sviewer; 845 PetscViewerFormat format; 846 847 PetscFunctionBegin; 848 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 850 if (iascii) { 851 PetscCall(PetscViewerGetFormat(viewer, &format)); 852 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 853 MatInfo info; 854 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 855 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 856 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 857 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 858 mat->rmap->bs, (double)info.memory)); 859 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 860 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 861 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 862 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 863 PetscCall(PetscViewerFlush(viewer)); 864 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 865 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 866 PetscCall(VecScatterView(baij->Mvctx, viewer)); 867 PetscFunctionReturn(0); 868 } else if (format == PETSC_VIEWER_ASCII_INFO) { 869 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 870 PetscFunctionReturn(0); 871 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 872 PetscFunctionReturn(0); 873 } 874 } 875 876 if (isdraw) { 877 PetscDraw draw; 878 PetscBool isnull; 879 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 880 PetscCall(PetscDrawIsNull(draw, &isnull)); 881 if (isnull) PetscFunctionReturn(0); 882 } 883 884 { 885 /* assemble the entire matrix onto first processor. */ 886 Mat A; 887 Mat_SeqSBAIJ *Aloc; 888 Mat_SeqBAIJ *Bloc; 889 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 890 MatScalar *a; 891 const char *matname; 892 893 /* Should this be the same type as mat? */ 894 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 895 if (rank == 0) { 896 PetscCall(MatSetSizes(A, M, N, M, N)); 897 } else { 898 PetscCall(MatSetSizes(A, 0, 0, M, N)); 899 } 900 PetscCall(MatSetType(A, MATMPISBAIJ)); 901 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 902 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 903 904 /* copy over the A part */ 905 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 906 ai = Aloc->i; 907 aj = Aloc->j; 908 a = Aloc->a; 909 PetscCall(PetscMalloc1(bs, &rvals)); 910 911 for (i = 0; i < mbs; i++) { 912 rvals[0] = bs * (baij->rstartbs + i); 913 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 914 for (j = ai[i]; j < ai[i + 1]; j++) { 915 col = (baij->cstartbs + aj[j]) * bs; 916 for (k = 0; k < bs; k++) { 917 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 918 col++; 919 a += bs; 920 } 921 } 922 } 923 /* copy over the B part */ 924 Bloc = (Mat_SeqBAIJ *)baij->B->data; 925 ai = Bloc->i; 926 aj = Bloc->j; 927 a = Bloc->a; 928 for (i = 0; i < mbs; i++) { 929 rvals[0] = bs * (baij->rstartbs + i); 930 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 931 for (j = ai[i]; j < ai[i + 1]; j++) { 932 col = baij->garray[aj[j]] * bs; 933 for (k = 0; k < bs; k++) { 934 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 935 col++; 936 a += bs; 937 } 938 } 939 } 940 PetscCall(PetscFree(rvals)); 941 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 942 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 943 /* 944 Everyone has to call to draw the matrix since the graphics waits are 945 synchronized across all processors that share the PetscDraw object 946 */ 947 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 948 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 949 if (rank == 0) { 950 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 951 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 952 } 953 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 954 PetscCall(PetscViewerFlush(viewer)); 955 PetscCall(MatDestroy(&A)); 956 } 957 PetscFunctionReturn(0); 958 } 959 960 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 961 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 962 963 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 964 { 965 PetscBool iascii, isdraw, issocket, isbinary; 966 967 PetscFunctionBegin; 968 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 969 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 970 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 971 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 972 if (iascii || isdraw || issocket) { 973 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 974 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 975 PetscFunctionReturn(0); 976 } 977 978 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 979 { 980 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 981 982 PetscFunctionBegin; 983 #if defined(PETSC_USE_LOG) 984 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 985 #endif 986 PetscCall(MatStashDestroy_Private(&mat->stash)); 987 PetscCall(MatStashDestroy_Private(&mat->bstash)); 988 PetscCall(MatDestroy(&baij->A)); 989 PetscCall(MatDestroy(&baij->B)); 990 #if defined(PETSC_USE_CTABLE) 991 PetscCall(PetscTableDestroy(&baij->colmap)); 992 #else 993 PetscCall(PetscFree(baij->colmap)); 994 #endif 995 PetscCall(PetscFree(baij->garray)); 996 PetscCall(VecDestroy(&baij->lvec)); 997 PetscCall(VecScatterDestroy(&baij->Mvctx)); 998 PetscCall(VecDestroy(&baij->slvec0)); 999 PetscCall(VecDestroy(&baij->slvec0b)); 1000 PetscCall(VecDestroy(&baij->slvec1)); 1001 PetscCall(VecDestroy(&baij->slvec1a)); 1002 PetscCall(VecDestroy(&baij->slvec1b)); 1003 PetscCall(VecScatterDestroy(&baij->sMvctx)); 1004 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1005 PetscCall(PetscFree(baij->barray)); 1006 PetscCall(PetscFree(baij->hd)); 1007 PetscCall(VecDestroy(&baij->diag)); 1008 PetscCall(VecDestroy(&baij->bb1)); 1009 PetscCall(VecDestroy(&baij->xx1)); 1010 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1011 PetscCall(PetscFree(baij->setvaluescopy)); 1012 #endif 1013 PetscCall(PetscFree(baij->in_loc)); 1014 PetscCall(PetscFree(baij->v_loc)); 1015 PetscCall(PetscFree(baij->rangebs)); 1016 PetscCall(PetscFree(mat->data)); 1017 1018 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 1023 #if defined(PETSC_HAVE_ELEMENTAL) 1024 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 1025 #endif 1026 #if defined(PETSC_HAVE_SCALAPACK) 1027 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 1028 #endif 1029 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 1030 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1035 { 1036 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1037 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1038 PetscScalar *from; 1039 const PetscScalar *x; 1040 1041 PetscFunctionBegin; 1042 /* diagonal part */ 1043 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1044 PetscCall(VecSet(a->slvec1b, 0.0)); 1045 1046 /* subdiagonal part */ 1047 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1048 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1049 1050 /* copy x into the vec slvec0 */ 1051 PetscCall(VecGetArray(a->slvec0, &from)); 1052 PetscCall(VecGetArrayRead(xx, &x)); 1053 1054 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1055 PetscCall(VecRestoreArray(a->slvec0, &from)); 1056 PetscCall(VecRestoreArrayRead(xx, &x)); 1057 1058 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1059 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1060 /* supperdiagonal part */ 1061 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1066 { 1067 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1068 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1069 PetscScalar *from; 1070 const PetscScalar *x; 1071 1072 PetscFunctionBegin; 1073 /* diagonal part */ 1074 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1075 PetscCall(VecSet(a->slvec1b, 0.0)); 1076 1077 /* subdiagonal part */ 1078 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1079 1080 /* copy x into the vec slvec0 */ 1081 PetscCall(VecGetArray(a->slvec0, &from)); 1082 PetscCall(VecGetArrayRead(xx, &x)); 1083 1084 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1085 PetscCall(VecRestoreArray(a->slvec0, &from)); 1086 PetscCall(VecRestoreArrayRead(xx, &x)); 1087 1088 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1089 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1090 /* supperdiagonal part */ 1091 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1096 { 1097 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1098 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1099 PetscScalar *from, zero = 0.0; 1100 const PetscScalar *x; 1101 1102 PetscFunctionBegin; 1103 /* diagonal part */ 1104 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1105 PetscCall(VecSet(a->slvec1b, zero)); 1106 1107 /* subdiagonal part */ 1108 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1109 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1110 1111 /* copy x into the vec slvec0 */ 1112 PetscCall(VecGetArray(a->slvec0, &from)); 1113 PetscCall(VecGetArrayRead(xx, &x)); 1114 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1115 PetscCall(VecRestoreArray(a->slvec0, &from)); 1116 1117 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1118 PetscCall(VecRestoreArrayRead(xx, &x)); 1119 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1120 1121 /* supperdiagonal part */ 1122 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1127 { 1128 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1129 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1130 PetscScalar *from, zero = 0.0; 1131 const PetscScalar *x; 1132 1133 PetscFunctionBegin; 1134 /* diagonal part */ 1135 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1136 PetscCall(VecSet(a->slvec1b, zero)); 1137 1138 /* subdiagonal part */ 1139 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1140 1141 /* copy x into the vec slvec0 */ 1142 PetscCall(VecGetArray(a->slvec0, &from)); 1143 PetscCall(VecGetArrayRead(xx, &x)); 1144 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1145 PetscCall(VecRestoreArray(a->slvec0, &from)); 1146 1147 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1148 PetscCall(VecRestoreArrayRead(xx, &x)); 1149 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1150 1151 /* supperdiagonal part */ 1152 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 /* 1157 This only works correctly for square matrices where the subblock A->A is the 1158 diagonal block 1159 */ 1160 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1161 { 1162 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1163 1164 PetscFunctionBegin; 1165 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1166 PetscCall(MatGetDiagonal(a->A, v)); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1171 { 1172 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1173 1174 PetscFunctionBegin; 1175 PetscCall(MatScale(a->A, aa)); 1176 PetscCall(MatScale(a->B, aa)); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1181 { 1182 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1183 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1184 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1185 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1186 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1187 1188 PetscFunctionBegin; 1189 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1190 mat->getrowactive = PETSC_TRUE; 1191 1192 if (!mat->rowvalues && (idx || v)) { 1193 /* 1194 allocate enough space to hold information from the longest row. 1195 */ 1196 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1197 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1198 PetscInt max = 1, mbs = mat->mbs, tmp; 1199 for (i = 0; i < mbs; i++) { 1200 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1201 if (max < tmp) max = tmp; 1202 } 1203 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1204 } 1205 1206 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1207 lrow = row - brstart; /* local row index */ 1208 1209 pvA = &vworkA; 1210 pcA = &cworkA; 1211 pvB = &vworkB; 1212 pcB = &cworkB; 1213 if (!v) { 1214 pvA = NULL; 1215 pvB = NULL; 1216 } 1217 if (!idx) { 1218 pcA = NULL; 1219 if (!v) pcB = NULL; 1220 } 1221 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1222 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1223 nztot = nzA + nzB; 1224 1225 cmap = mat->garray; 1226 if (v || idx) { 1227 if (nztot) { 1228 /* Sort by increasing column numbers, assuming A and B already sorted */ 1229 PetscInt imark = -1; 1230 if (v) { 1231 *v = v_p = mat->rowvalues; 1232 for (i = 0; i < nzB; i++) { 1233 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1234 else break; 1235 } 1236 imark = i; 1237 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1238 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1239 } 1240 if (idx) { 1241 *idx = idx_p = mat->rowindices; 1242 if (imark > -1) { 1243 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1244 } else { 1245 for (i = 0; i < nzB; i++) { 1246 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1247 else break; 1248 } 1249 imark = i; 1250 } 1251 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1252 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1253 } 1254 } else { 1255 if (idx) *idx = NULL; 1256 if (v) *v = NULL; 1257 } 1258 } 1259 *nz = nztot; 1260 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1261 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1266 { 1267 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1268 1269 PetscFunctionBegin; 1270 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1271 baij->getrowactive = PETSC_FALSE; 1272 PetscFunctionReturn(0); 1273 } 1274 1275 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1276 { 1277 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1278 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1279 1280 PetscFunctionBegin; 1281 aA->getrow_utriangular = PETSC_TRUE; 1282 PetscFunctionReturn(0); 1283 } 1284 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1285 { 1286 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1287 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1288 1289 PetscFunctionBegin; 1290 aA->getrow_utriangular = PETSC_FALSE; 1291 PetscFunctionReturn(0); 1292 } 1293 1294 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1295 { 1296 PetscFunctionBegin; 1297 if (PetscDefined(USE_COMPLEX)) { 1298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1299 1300 PetscCall(MatConjugate(a->A)); 1301 PetscCall(MatConjugate(a->B)); 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1307 { 1308 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1309 1310 PetscFunctionBegin; 1311 PetscCall(MatRealPart(a->A)); 1312 PetscCall(MatRealPart(a->B)); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1317 { 1318 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1319 1320 PetscFunctionBegin; 1321 PetscCall(MatImaginaryPart(a->A)); 1322 PetscCall(MatImaginaryPart(a->B)); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1327 Input: isrow - distributed(parallel), 1328 iscol_local - locally owned (seq) 1329 */ 1330 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1331 { 1332 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1333 const PetscInt *ptr1, *ptr2; 1334 1335 PetscFunctionBegin; 1336 PetscCall(ISGetLocalSize(isrow, &sz1)); 1337 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1338 if (sz1 > sz2) { 1339 *flg = PETSC_FALSE; 1340 PetscFunctionReturn(0); 1341 } 1342 1343 PetscCall(ISGetIndices(isrow, &ptr1)); 1344 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1345 1346 PetscCall(PetscMalloc1(sz1, &a1)); 1347 PetscCall(PetscMalloc1(sz2, &a2)); 1348 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1349 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1350 PetscCall(PetscSortInt(sz1, a1)); 1351 PetscCall(PetscSortInt(sz2, a2)); 1352 1353 nmatch = 0; 1354 k = 0; 1355 for (i = 0; i < sz1; i++) { 1356 for (j = k; j < sz2; j++) { 1357 if (a1[i] == a2[j]) { 1358 k = j; 1359 nmatch++; 1360 break; 1361 } 1362 } 1363 } 1364 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1365 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1366 PetscCall(PetscFree(a1)); 1367 PetscCall(PetscFree(a2)); 1368 if (nmatch < sz1) { 1369 *flg = PETSC_FALSE; 1370 } else { 1371 *flg = PETSC_TRUE; 1372 } 1373 PetscFunctionReturn(0); 1374 } 1375 1376 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1377 { 1378 IS iscol_local; 1379 PetscInt csize; 1380 PetscBool isequal; 1381 1382 PetscFunctionBegin; 1383 PetscCall(ISGetLocalSize(iscol, &csize)); 1384 if (call == MAT_REUSE_MATRIX) { 1385 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1386 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1387 } else { 1388 PetscBool issorted; 1389 1390 PetscCall(ISAllGather(iscol, &iscol_local)); 1391 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1392 PetscCall(ISSorted(iscol_local, &issorted)); 1393 PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 1394 } 1395 1396 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1397 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1398 if (call == MAT_INITIAL_MATRIX) { 1399 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1400 PetscCall(ISDestroy(&iscol_local)); 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1406 { 1407 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1408 1409 PetscFunctionBegin; 1410 PetscCall(MatZeroEntries(l->A)); 1411 PetscCall(MatZeroEntries(l->B)); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1416 { 1417 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1418 Mat A = a->A, B = a->B; 1419 PetscLogDouble isend[5], irecv[5]; 1420 1421 PetscFunctionBegin; 1422 info->block_size = (PetscReal)matin->rmap->bs; 1423 1424 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1425 1426 isend[0] = info->nz_used; 1427 isend[1] = info->nz_allocated; 1428 isend[2] = info->nz_unneeded; 1429 isend[3] = info->memory; 1430 isend[4] = info->mallocs; 1431 1432 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1433 1434 isend[0] += info->nz_used; 1435 isend[1] += info->nz_allocated; 1436 isend[2] += info->nz_unneeded; 1437 isend[3] += info->memory; 1438 isend[4] += info->mallocs; 1439 if (flag == MAT_LOCAL) { 1440 info->nz_used = isend[0]; 1441 info->nz_allocated = isend[1]; 1442 info->nz_unneeded = isend[2]; 1443 info->memory = isend[3]; 1444 info->mallocs = isend[4]; 1445 } else if (flag == MAT_GLOBAL_MAX) { 1446 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else if (flag == MAT_GLOBAL_SUM) { 1454 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1455 1456 info->nz_used = irecv[0]; 1457 info->nz_allocated = irecv[1]; 1458 info->nz_unneeded = irecv[2]; 1459 info->memory = irecv[3]; 1460 info->mallocs = irecv[4]; 1461 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1462 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1463 info->fill_ratio_needed = 0; 1464 info->factor_mallocs = 0; 1465 PetscFunctionReturn(0); 1466 } 1467 1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1469 { 1470 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1471 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1472 1473 PetscFunctionBegin; 1474 switch (op) { 1475 case MAT_NEW_NONZERO_LOCATIONS: 1476 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1477 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1478 case MAT_KEEP_NONZERO_PATTERN: 1479 case MAT_SUBMAT_SINGLEIS: 1480 case MAT_NEW_NONZERO_LOCATION_ERR: 1481 MatCheckPreallocated(A, 1); 1482 PetscCall(MatSetOption(a->A, op, flg)); 1483 PetscCall(MatSetOption(a->B, op, flg)); 1484 break; 1485 case MAT_ROW_ORIENTED: 1486 MatCheckPreallocated(A, 1); 1487 a->roworiented = flg; 1488 1489 PetscCall(MatSetOption(a->A, op, flg)); 1490 PetscCall(MatSetOption(a->B, op, flg)); 1491 break; 1492 case MAT_FORCE_DIAGONAL_ENTRIES: 1493 case MAT_SORTED_FULL: 1494 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1495 break; 1496 case MAT_IGNORE_OFF_PROC_ENTRIES: 1497 a->donotstash = flg; 1498 break; 1499 case MAT_USE_HASH_TABLE: 1500 a->ht_flag = flg; 1501 break; 1502 case MAT_HERMITIAN: 1503 MatCheckPreallocated(A, 1); 1504 PetscCall(MatSetOption(a->A, op, flg)); 1505 #if defined(PETSC_USE_COMPLEX) 1506 if (flg) { /* need different mat-vec ops */ 1507 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1508 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1509 A->ops->multtranspose = NULL; 1510 A->ops->multtransposeadd = NULL; 1511 A->symmetric = PETSC_BOOL3_FALSE; 1512 } 1513 #endif 1514 break; 1515 case MAT_SPD: 1516 case MAT_SYMMETRIC: 1517 MatCheckPreallocated(A, 1); 1518 PetscCall(MatSetOption(a->A, op, flg)); 1519 #if defined(PETSC_USE_COMPLEX) 1520 if (flg) { /* restore to use default mat-vec ops */ 1521 A->ops->mult = MatMult_MPISBAIJ; 1522 A->ops->multadd = MatMultAdd_MPISBAIJ; 1523 A->ops->multtranspose = MatMult_MPISBAIJ; 1524 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1525 } 1526 #endif 1527 break; 1528 case MAT_STRUCTURALLY_SYMMETRIC: 1529 MatCheckPreallocated(A, 1); 1530 PetscCall(MatSetOption(a->A, op, flg)); 1531 break; 1532 case MAT_SYMMETRY_ETERNAL: 1533 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1534 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1535 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1536 break; 1537 case MAT_SPD_ETERNAL: 1538 break; 1539 case MAT_IGNORE_LOWER_TRIANGULAR: 1540 aA->ignore_ltriangular = flg; 1541 break; 1542 case MAT_ERROR_LOWER_TRIANGULAR: 1543 aA->ignore_ltriangular = flg; 1544 break; 1545 case MAT_GETROW_UPPERTRIANGULAR: 1546 aA->getrow_utriangular = flg; 1547 break; 1548 default: 1549 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1550 } 1551 PetscFunctionReturn(0); 1552 } 1553 1554 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1555 { 1556 PetscFunctionBegin; 1557 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1558 if (reuse == MAT_INITIAL_MATRIX) { 1559 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1560 } else if (reuse == MAT_REUSE_MATRIX) { 1561 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1567 { 1568 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1569 Mat a = baij->A, b = baij->B; 1570 PetscInt nv, m, n; 1571 PetscBool flg; 1572 1573 PetscFunctionBegin; 1574 if (ll != rr) { 1575 PetscCall(VecEqual(ll, rr, &flg)); 1576 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1577 } 1578 if (!ll) PetscFunctionReturn(0); 1579 1580 PetscCall(MatGetLocalSize(mat, &m, &n)); 1581 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1582 1583 PetscCall(VecGetLocalSize(rr, &nv)); 1584 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1585 1586 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1587 1588 /* left diagonalscale the off-diagonal part */ 1589 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1590 1591 /* scale the diagonal part */ 1592 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1593 1594 /* right diagonalscale the off-diagonal part */ 1595 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1596 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1601 { 1602 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1603 1604 PetscFunctionBegin; 1605 PetscCall(MatSetUnfactored(a->A)); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1610 1611 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1612 { 1613 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1614 Mat a, b, c, d; 1615 PetscBool flg; 1616 1617 PetscFunctionBegin; 1618 a = matA->A; 1619 b = matA->B; 1620 c = matB->A; 1621 d = matB->B; 1622 1623 PetscCall(MatEqual(a, c, &flg)); 1624 if (flg) PetscCall(MatEqual(b, d, &flg)); 1625 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1630 { 1631 PetscBool isbaij; 1632 1633 PetscFunctionBegin; 1634 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1635 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1636 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1637 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1638 PetscCall(MatGetRowUpperTriangular(A)); 1639 PetscCall(MatCopy_Basic(A, B, str)); 1640 PetscCall(MatRestoreRowUpperTriangular(A)); 1641 } else { 1642 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1643 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1644 1645 PetscCall(MatCopy(a->A, b->A, str)); 1646 PetscCall(MatCopy(a->B, b->B, str)); 1647 } 1648 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1653 { 1654 PetscFunctionBegin; 1655 PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1656 PetscFunctionReturn(0); 1657 } 1658 1659 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1660 { 1661 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1662 PetscBLASInt bnz, one = 1; 1663 Mat_SeqSBAIJ *xa, *ya; 1664 Mat_SeqBAIJ *xb, *yb; 1665 1666 PetscFunctionBegin; 1667 if (str == SAME_NONZERO_PATTERN) { 1668 PetscScalar alpha = a; 1669 xa = (Mat_SeqSBAIJ *)xx->A->data; 1670 ya = (Mat_SeqSBAIJ *)yy->A->data; 1671 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1672 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1673 xb = (Mat_SeqBAIJ *)xx->B->data; 1674 yb = (Mat_SeqBAIJ *)yy->B->data; 1675 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1676 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1677 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1678 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1679 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1680 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1681 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1682 } else { 1683 Mat B; 1684 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1685 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1686 PetscCall(MatGetRowUpperTriangular(X)); 1687 PetscCall(MatGetRowUpperTriangular(Y)); 1688 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1689 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1690 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1691 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1692 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1693 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1694 PetscCall(MatSetType(B, MATMPISBAIJ)); 1695 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1696 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1697 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1698 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1699 PetscCall(MatHeaderMerge(Y, &B)); 1700 PetscCall(PetscFree(nnz_d)); 1701 PetscCall(PetscFree(nnz_o)); 1702 PetscCall(MatRestoreRowUpperTriangular(X)); 1703 PetscCall(MatRestoreRowUpperTriangular(Y)); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1709 { 1710 PetscInt i; 1711 PetscBool flg; 1712 1713 PetscFunctionBegin; 1714 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1715 for (i = 0; i < n; i++) { 1716 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1717 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1723 { 1724 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1725 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1726 1727 PetscFunctionBegin; 1728 if (!Y->preallocated) { 1729 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1730 } else if (!aij->nz) { 1731 PetscInt nonew = aij->nonew; 1732 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1733 aij->nonew = nonew; 1734 } 1735 PetscCall(MatShift_Basic(Y, a)); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1740 { 1741 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1742 1743 PetscFunctionBegin; 1744 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1745 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1746 if (d) { 1747 PetscInt rstart; 1748 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1749 *d += rstart / A->rmap->bs; 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1755 { 1756 PetscFunctionBegin; 1757 *a = ((Mat_MPISBAIJ *)A->data)->A; 1758 PetscFunctionReturn(0); 1759 } 1760 1761 /* -------------------------------------------------------------------*/ 1762 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1763 MatGetRow_MPISBAIJ, 1764 MatRestoreRow_MPISBAIJ, 1765 MatMult_MPISBAIJ, 1766 /* 4*/ MatMultAdd_MPISBAIJ, 1767 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1768 MatMultAdd_MPISBAIJ, 1769 NULL, 1770 NULL, 1771 NULL, 1772 /* 10*/ NULL, 1773 NULL, 1774 NULL, 1775 MatSOR_MPISBAIJ, 1776 MatTranspose_MPISBAIJ, 1777 /* 15*/ MatGetInfo_MPISBAIJ, 1778 MatEqual_MPISBAIJ, 1779 MatGetDiagonal_MPISBAIJ, 1780 MatDiagonalScale_MPISBAIJ, 1781 MatNorm_MPISBAIJ, 1782 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1783 MatAssemblyEnd_MPISBAIJ, 1784 MatSetOption_MPISBAIJ, 1785 MatZeroEntries_MPISBAIJ, 1786 /* 24*/ NULL, 1787 NULL, 1788 NULL, 1789 NULL, 1790 NULL, 1791 /* 29*/ MatSetUp_MPISBAIJ, 1792 NULL, 1793 NULL, 1794 MatGetDiagonalBlock_MPISBAIJ, 1795 NULL, 1796 /* 34*/ MatDuplicate_MPISBAIJ, 1797 NULL, 1798 NULL, 1799 NULL, 1800 NULL, 1801 /* 39*/ MatAXPY_MPISBAIJ, 1802 MatCreateSubMatrices_MPISBAIJ, 1803 MatIncreaseOverlap_MPISBAIJ, 1804 MatGetValues_MPISBAIJ, 1805 MatCopy_MPISBAIJ, 1806 /* 44*/ NULL, 1807 MatScale_MPISBAIJ, 1808 MatShift_MPISBAIJ, 1809 NULL, 1810 NULL, 1811 /* 49*/ NULL, 1812 NULL, 1813 NULL, 1814 NULL, 1815 NULL, 1816 /* 54*/ NULL, 1817 NULL, 1818 MatSetUnfactored_MPISBAIJ, 1819 NULL, 1820 MatSetValuesBlocked_MPISBAIJ, 1821 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1822 NULL, 1823 NULL, 1824 NULL, 1825 NULL, 1826 /* 64*/ NULL, 1827 NULL, 1828 NULL, 1829 NULL, 1830 NULL, 1831 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1832 NULL, 1833 MatConvert_MPISBAIJ_Basic, 1834 NULL, 1835 NULL, 1836 /* 74*/ NULL, 1837 NULL, 1838 NULL, 1839 NULL, 1840 NULL, 1841 /* 79*/ NULL, 1842 NULL, 1843 NULL, 1844 NULL, 1845 MatLoad_MPISBAIJ, 1846 /* 84*/ NULL, 1847 NULL, 1848 NULL, 1849 NULL, 1850 NULL, 1851 /* 89*/ NULL, 1852 NULL, 1853 NULL, 1854 NULL, 1855 NULL, 1856 /* 94*/ NULL, 1857 NULL, 1858 NULL, 1859 NULL, 1860 NULL, 1861 /* 99*/ NULL, 1862 NULL, 1863 NULL, 1864 MatConjugate_MPISBAIJ, 1865 NULL, 1866 /*104*/ NULL, 1867 MatRealPart_MPISBAIJ, 1868 MatImaginaryPart_MPISBAIJ, 1869 MatGetRowUpperTriangular_MPISBAIJ, 1870 MatRestoreRowUpperTriangular_MPISBAIJ, 1871 /*109*/ NULL, 1872 NULL, 1873 NULL, 1874 NULL, 1875 MatMissingDiagonal_MPISBAIJ, 1876 /*114*/ NULL, 1877 NULL, 1878 NULL, 1879 NULL, 1880 NULL, 1881 /*119*/ NULL, 1882 NULL, 1883 NULL, 1884 NULL, 1885 NULL, 1886 /*124*/ NULL, 1887 NULL, 1888 NULL, 1889 NULL, 1890 NULL, 1891 /*129*/ NULL, 1892 NULL, 1893 NULL, 1894 NULL, 1895 NULL, 1896 /*134*/ NULL, 1897 NULL, 1898 NULL, 1899 NULL, 1900 NULL, 1901 /*139*/ MatSetBlockSizes_Default, 1902 NULL, 1903 NULL, 1904 NULL, 1905 NULL, 1906 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1907 NULL, 1908 NULL, 1909 NULL, 1910 NULL, 1911 NULL, 1912 /*150*/ NULL}; 1913 1914 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1915 { 1916 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1917 PetscInt i, mbs, Mbs; 1918 PetscMPIInt size; 1919 1920 PetscFunctionBegin; 1921 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1922 PetscCall(PetscLayoutSetUp(B->rmap)); 1923 PetscCall(PetscLayoutSetUp(B->cmap)); 1924 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1925 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1926 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1927 1928 mbs = B->rmap->n / bs; 1929 Mbs = B->rmap->N / bs; 1930 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1931 1932 B->rmap->bs = bs; 1933 b->bs2 = bs * bs; 1934 b->mbs = mbs; 1935 b->Mbs = Mbs; 1936 b->nbs = B->cmap->n / bs; 1937 b->Nbs = B->cmap->N / bs; 1938 1939 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1940 b->rstartbs = B->rmap->rstart / bs; 1941 b->rendbs = B->rmap->rend / bs; 1942 1943 b->cstartbs = B->cmap->rstart / bs; 1944 b->cendbs = B->cmap->rend / bs; 1945 1946 #if defined(PETSC_USE_CTABLE) 1947 PetscCall(PetscTableDestroy(&b->colmap)); 1948 #else 1949 PetscCall(PetscFree(b->colmap)); 1950 #endif 1951 PetscCall(PetscFree(b->garray)); 1952 PetscCall(VecDestroy(&b->lvec)); 1953 PetscCall(VecScatterDestroy(&b->Mvctx)); 1954 PetscCall(VecDestroy(&b->slvec0)); 1955 PetscCall(VecDestroy(&b->slvec0b)); 1956 PetscCall(VecDestroy(&b->slvec1)); 1957 PetscCall(VecDestroy(&b->slvec1a)); 1958 PetscCall(VecDestroy(&b->slvec1b)); 1959 PetscCall(VecScatterDestroy(&b->sMvctx)); 1960 1961 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1962 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1963 PetscCall(MatDestroy(&b->B)); 1964 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1965 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1966 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1967 1968 if (!B->preallocated) { 1969 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1970 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1971 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1972 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1973 } 1974 1975 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1976 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1977 1978 B->preallocated = PETSC_TRUE; 1979 B->was_assembled = PETSC_FALSE; 1980 B->assembled = PETSC_FALSE; 1981 PetscFunctionReturn(0); 1982 } 1983 1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1985 { 1986 PetscInt m, rstart, cend; 1987 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1988 const PetscInt *JJ = NULL; 1989 PetscScalar *values = NULL; 1990 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 1991 PetscBool nooffprocentries; 1992 1993 PetscFunctionBegin; 1994 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 1995 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 1996 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 1997 PetscCall(PetscLayoutSetUp(B->rmap)); 1998 PetscCall(PetscLayoutSetUp(B->cmap)); 1999 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2000 m = B->rmap->n / bs; 2001 rstart = B->rmap->rstart / bs; 2002 cend = B->cmap->rend / bs; 2003 2004 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2005 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2006 for (i = 0; i < m; i++) { 2007 nz = ii[i + 1] - ii[i]; 2008 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2009 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2010 JJ = jj + ii[i]; 2011 bd = 0; 2012 for (j = 0; j < nz; j++) { 2013 if (*JJ >= i + rstart) break; 2014 JJ++; 2015 bd++; 2016 } 2017 d = 0; 2018 for (; j < nz; j++) { 2019 if (*JJ++ >= cend) break; 2020 d++; 2021 } 2022 d_nnz[i] = d; 2023 o_nnz[i] = nz - d - bd; 2024 nz = nz - bd; 2025 nz_max = PetscMax(nz_max, nz); 2026 } 2027 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2028 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2029 PetscCall(PetscFree2(d_nnz, o_nnz)); 2030 2031 values = (PetscScalar *)V; 2032 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2033 for (i = 0; i < m; i++) { 2034 PetscInt row = i + rstart; 2035 PetscInt ncols = ii[i + 1] - ii[i]; 2036 const PetscInt *icols = jj + ii[i]; 2037 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2038 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2039 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2040 } else { /* block ordering does not match so we can only insert one block at a time. */ 2041 PetscInt j; 2042 for (j = 0; j < ncols; j++) { 2043 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2044 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2045 } 2046 } 2047 } 2048 2049 if (!V) PetscCall(PetscFree(values)); 2050 nooffprocentries = B->nooffprocentries; 2051 B->nooffprocentries = PETSC_TRUE; 2052 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2053 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2054 B->nooffprocentries = nooffprocentries; 2055 2056 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2057 PetscFunctionReturn(0); 2058 } 2059 2060 /*MC 2061 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2062 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2063 the matrix is stored. 2064 2065 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2066 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2067 2068 Options Database Keys: 2069 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2070 2071 Note: 2072 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2073 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2074 2075 Level: beginner 2076 2077 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2078 M*/ 2079 2080 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2081 { 2082 Mat_MPISBAIJ *b; 2083 PetscBool flg = PETSC_FALSE; 2084 2085 PetscFunctionBegin; 2086 PetscCall(PetscNew(&b)); 2087 B->data = (void *)b; 2088 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2089 2090 B->ops->destroy = MatDestroy_MPISBAIJ; 2091 B->ops->view = MatView_MPISBAIJ; 2092 B->assembled = PETSC_FALSE; 2093 B->insertmode = NOT_SET_VALUES; 2094 2095 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2096 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2097 2098 /* build local table of row and column ownerships */ 2099 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2100 2101 /* build cache for off array entries formed */ 2102 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2103 2104 b->donotstash = PETSC_FALSE; 2105 b->colmap = NULL; 2106 b->garray = NULL; 2107 b->roworiented = PETSC_TRUE; 2108 2109 /* stuff used in block assembly */ 2110 b->barray = NULL; 2111 2112 /* stuff used for matrix vector multiply */ 2113 b->lvec = NULL; 2114 b->Mvctx = NULL; 2115 b->slvec0 = NULL; 2116 b->slvec0b = NULL; 2117 b->slvec1 = NULL; 2118 b->slvec1a = NULL; 2119 b->slvec1b = NULL; 2120 b->sMvctx = NULL; 2121 2122 /* stuff for MatGetRow() */ 2123 b->rowindices = NULL; 2124 b->rowvalues = NULL; 2125 b->getrowactive = PETSC_FALSE; 2126 2127 /* hash table stuff */ 2128 b->ht = NULL; 2129 b->hd = NULL; 2130 b->ht_size = 0; 2131 b->ht_flag = PETSC_FALSE; 2132 b->ht_fact = 0; 2133 b->ht_total_ct = 0; 2134 b->ht_insert_ct = 0; 2135 2136 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2137 b->ijonly = PETSC_FALSE; 2138 2139 b->in_loc = NULL; 2140 b->v_loc = NULL; 2141 b->n_loc = 0; 2142 2143 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2144 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2145 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2146 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2147 #if defined(PETSC_HAVE_ELEMENTAL) 2148 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2149 #endif 2150 #if defined(PETSC_HAVE_SCALAPACK) 2151 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2152 #endif 2153 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2154 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2155 2156 B->symmetric = PETSC_BOOL3_TRUE; 2157 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2158 B->symmetry_eternal = PETSC_TRUE; 2159 B->structural_symmetry_eternal = PETSC_TRUE; 2160 #if defined(PETSC_USE_COMPLEX) 2161 B->hermitian = PETSC_BOOL3_FALSE; 2162 #else 2163 B->hermitian = PETSC_BOOL3_TRUE; 2164 #endif 2165 2166 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2167 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2168 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2169 if (flg) { 2170 PetscReal fact = 1.39; 2171 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2172 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2173 if (fact <= 1.0) fact = 1.39; 2174 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2175 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2176 } 2177 PetscOptionsEnd(); 2178 PetscFunctionReturn(0); 2179 } 2180 2181 /*MC 2182 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2183 2184 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2185 and `MATMPISBAIJ` otherwise. 2186 2187 Options Database Key: 2188 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2189 2190 Level: beginner 2191 2192 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2193 M*/ 2194 2195 /*@C 2196 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2197 the user should preallocate the matrix storage by setting the parameters 2198 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2199 performance can be increased by more than a factor of 50. 2200 2201 Collective 2202 2203 Input Parameters: 2204 + B - the matrix 2205 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2206 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2207 . d_nz - number of block nonzeros per block row in diagonal portion of local 2208 submatrix (same for all local rows) 2209 . d_nnz - array containing the number of block nonzeros in the various block rows 2210 in the upper triangular and diagonal part of the in diagonal portion of the local 2211 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2212 for the diagonal entry and set a value even if it is zero. 2213 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2214 submatrix (same for all local rows). 2215 - o_nnz - array containing the number of nonzeros in the various block rows of the 2216 off-diagonal portion of the local submatrix that is right of the diagonal 2217 (possibly different for each block row) or NULL. 2218 2219 Options Database Keys: 2220 + -mat_no_unroll - uses code that does not unroll the loops in the 2221 block calculations (much slower) 2222 - -mat_block_size - size of the blocks to use 2223 2224 Notes: 2225 2226 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2227 than it must be used on all processors that share the object for that argument. 2228 2229 If the *_nnz parameter is given then the *_nz parameter is ignored 2230 2231 Storage Information: 2232 For a square global matrix we define each processor's diagonal portion 2233 to be its local rows and the corresponding columns (a square submatrix); 2234 each processor's off-diagonal portion encompasses the remainder of the 2235 local matrix (a rectangular submatrix). 2236 2237 The user can specify preallocated storage for the diagonal part of 2238 the local submatrix with either d_nz or d_nnz (not both). Set 2239 d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic 2240 memory allocation. Likewise, specify preallocated storage for the 2241 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2242 2243 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2244 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2245 You can also run with the option -info and look for messages with the string 2246 malloc in them to see if additional memory allocation was needed. 2247 2248 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2249 the figure below we depict these three local rows and all columns (0-11). 2250 2251 .vb 2252 0 1 2 3 4 5 6 7 8 9 10 11 2253 -------------------------- 2254 row 3 |. . . d d d o o o o o o 2255 row 4 |. . . d d d o o o o o o 2256 row 5 |. . . d d d o o o o o o 2257 -------------------------- 2258 .ve 2259 2260 Thus, any entries in the d locations are stored in the d (diagonal) 2261 submatrix, and any entries in the o locations are stored in the 2262 o (off-diagonal) submatrix. Note that the d matrix is stored in 2263 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2264 2265 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2266 plus the diagonal part of the d matrix, 2267 and o_nz should indicate the number of block nonzeros per row in the o matrix 2268 2269 In general, for PDE problems in which most nonzeros are near the diagonal, 2270 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2271 or you will get TERRIBLE performance; see the users' manual chapter on 2272 matrices. 2273 2274 Level: intermediate 2275 2276 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2277 @*/ 2278 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2279 { 2280 PetscFunctionBegin; 2281 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2282 PetscValidType(B, 1); 2283 PetscValidLogicalCollectiveInt(B, bs, 2); 2284 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2285 PetscFunctionReturn(0); 2286 } 2287 2288 /*@C 2289 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2290 (block compressed row). For good matrix assembly performance 2291 the user should preallocate the matrix storage by setting the parameters 2292 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2293 performance can be increased by more than a factor of 50. 2294 2295 Collective 2296 2297 Input Parameters: 2298 + comm - MPI communicator 2299 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2300 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2301 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2302 This value should be the same as the local size used in creating the 2303 y vector for the matrix-vector product y = Ax. 2304 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2305 This value should be the same as the local size used in creating the 2306 x vector for the matrix-vector product y = Ax. 2307 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2308 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2309 . d_nz - number of block nonzeros per block row in diagonal portion of local 2310 submatrix (same for all local rows) 2311 . d_nnz - array containing the number of block nonzeros in the various block rows 2312 in the upper triangular portion of the in diagonal portion of the local 2313 (possibly different for each block block row) or NULL. 2314 If you plan to factor the matrix you must leave room for the diagonal entry and 2315 set its value even if it is zero. 2316 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2317 submatrix (same for all local rows). 2318 - o_nnz - array containing the number of nonzeros in the various block rows of the 2319 off-diagonal portion of the local submatrix (possibly different for 2320 each block row) or NULL. 2321 2322 Output Parameter: 2323 . A - the matrix 2324 2325 Options Database Keys: 2326 + -mat_no_unroll - uses code that does not unroll the loops in the 2327 block calculations (much slower) 2328 . -mat_block_size - size of the blocks to use 2329 - -mat_mpi - use the parallel matrix data structures even on one processor 2330 (defaults to using SeqBAIJ format on one processor) 2331 2332 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2333 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2334 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2335 2336 Notes: 2337 The number of rows and columns must be divisible by blocksize. 2338 This matrix type does not support complex Hermitian operation. 2339 2340 The user MUST specify either the local or global matrix dimensions 2341 (possibly both). 2342 2343 If` PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2344 than it must be used on all processors that share the object for that argument. 2345 2346 If the *_nnz parameter is given then the *_nz parameter is ignored 2347 2348 Storage Information: 2349 For a square global matrix we define each processor's diagonal portion 2350 to be its local rows and the corresponding columns (a square submatrix); 2351 each processor's off-diagonal portion encompasses the remainder of the 2352 local matrix (a rectangular submatrix). 2353 2354 The user can specify preallocated storage for the diagonal part of 2355 the local submatrix with either d_nz or d_nnz (not both). Set 2356 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2357 memory allocation. Likewise, specify preallocated storage for the 2358 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2359 2360 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2361 the figure below we depict these three local rows and all columns (0-11). 2362 2363 .vb 2364 0 1 2 3 4 5 6 7 8 9 10 11 2365 -------------------------- 2366 row 3 |. . . d d d o o o o o o 2367 row 4 |. . . d d d o o o o o o 2368 row 5 |. . . d d d o o o o o o 2369 -------------------------- 2370 .ve 2371 2372 Thus, any entries in the d locations are stored in the d (diagonal) 2373 submatrix, and any entries in the o locations are stored in the 2374 o (off-diagonal) submatrix. Note that the d matrix is stored in 2375 MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format. 2376 2377 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2378 plus the diagonal part of the d matrix, 2379 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2380 In general, for PDE problems in which most nonzeros are near the diagonal, 2381 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2382 or you will get TERRIBLE performance; see the users' manual chapter on 2383 matrices. 2384 2385 Level: intermediate 2386 2387 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2388 @*/ 2389 2390 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2391 { 2392 PetscMPIInt size; 2393 2394 PetscFunctionBegin; 2395 PetscCall(MatCreate(comm, A)); 2396 PetscCall(MatSetSizes(*A, m, n, M, N)); 2397 PetscCallMPI(MPI_Comm_size(comm, &size)); 2398 if (size > 1) { 2399 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2400 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2401 } else { 2402 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2403 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2404 } 2405 PetscFunctionReturn(0); 2406 } 2407 2408 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2409 { 2410 Mat mat; 2411 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2412 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2413 PetscScalar *array; 2414 2415 PetscFunctionBegin; 2416 *newmat = NULL; 2417 2418 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2419 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2420 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2421 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2422 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2423 2424 mat->factortype = matin->factortype; 2425 mat->preallocated = PETSC_TRUE; 2426 mat->assembled = PETSC_TRUE; 2427 mat->insertmode = NOT_SET_VALUES; 2428 2429 a = (Mat_MPISBAIJ *)mat->data; 2430 a->bs2 = oldmat->bs2; 2431 a->mbs = oldmat->mbs; 2432 a->nbs = oldmat->nbs; 2433 a->Mbs = oldmat->Mbs; 2434 a->Nbs = oldmat->Nbs; 2435 2436 a->size = oldmat->size; 2437 a->rank = oldmat->rank; 2438 a->donotstash = oldmat->donotstash; 2439 a->roworiented = oldmat->roworiented; 2440 a->rowindices = NULL; 2441 a->rowvalues = NULL; 2442 a->getrowactive = PETSC_FALSE; 2443 a->barray = NULL; 2444 a->rstartbs = oldmat->rstartbs; 2445 a->rendbs = oldmat->rendbs; 2446 a->cstartbs = oldmat->cstartbs; 2447 a->cendbs = oldmat->cendbs; 2448 2449 /* hash table stuff */ 2450 a->ht = NULL; 2451 a->hd = NULL; 2452 a->ht_size = 0; 2453 a->ht_flag = oldmat->ht_flag; 2454 a->ht_fact = oldmat->ht_fact; 2455 a->ht_total_ct = 0; 2456 a->ht_insert_ct = 0; 2457 2458 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2459 if (oldmat->colmap) { 2460 #if defined(PETSC_USE_CTABLE) 2461 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 2462 #else 2463 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2464 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2465 #endif 2466 } else a->colmap = NULL; 2467 2468 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2469 PetscCall(PetscMalloc1(len, &a->garray)); 2470 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2471 } else a->garray = NULL; 2472 2473 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2474 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2475 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2476 2477 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2478 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2479 2480 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2481 PetscCall(VecGetArray(a->slvec1, &array)); 2482 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2483 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2484 PetscCall(VecRestoreArray(a->slvec1, &array)); 2485 PetscCall(VecGetArray(a->slvec0, &array)); 2486 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2487 PetscCall(VecRestoreArray(a->slvec0, &array)); 2488 2489 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2490 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2491 a->sMvctx = oldmat->sMvctx; 2492 2493 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2494 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2495 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2496 *newmat = mat; 2497 PetscFunctionReturn(0); 2498 } 2499 2500 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2501 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2502 2503 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2504 { 2505 PetscBool isbinary; 2506 2507 PetscFunctionBegin; 2508 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2509 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2510 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2511 PetscFunctionReturn(0); 2512 } 2513 2514 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2515 { 2516 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2517 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2518 PetscReal atmp; 2519 PetscReal *work, *svalues, *rvalues; 2520 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2521 PetscMPIInt rank, size; 2522 PetscInt *rowners_bs, dest, count, source; 2523 PetscScalar *va; 2524 MatScalar *ba; 2525 MPI_Status stat; 2526 2527 PetscFunctionBegin; 2528 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2529 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2530 PetscCall(VecGetArray(v, &va)); 2531 2532 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2533 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2534 2535 bs = A->rmap->bs; 2536 mbs = a->mbs; 2537 Mbs = a->Mbs; 2538 ba = b->a; 2539 bi = b->i; 2540 bj = b->j; 2541 2542 /* find ownerships */ 2543 rowners_bs = A->rmap->range; 2544 2545 /* each proc creates an array to be distributed */ 2546 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2547 2548 /* row_max for B */ 2549 if (rank != size - 1) { 2550 for (i = 0; i < mbs; i++) { 2551 ncols = bi[1] - bi[0]; 2552 bi++; 2553 brow = bs * i; 2554 for (j = 0; j < ncols; j++) { 2555 bcol = bs * (*bj); 2556 for (kcol = 0; kcol < bs; kcol++) { 2557 col = bcol + kcol; /* local col index */ 2558 col += rowners_bs[rank + 1]; /* global col index */ 2559 for (krow = 0; krow < bs; krow++) { 2560 atmp = PetscAbsScalar(*ba); 2561 ba++; 2562 row = brow + krow; /* local row index */ 2563 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2564 if (work[col] < atmp) work[col] = atmp; 2565 } 2566 } 2567 bj++; 2568 } 2569 } 2570 2571 /* send values to its owners */ 2572 for (dest = rank + 1; dest < size; dest++) { 2573 svalues = work + rowners_bs[dest]; 2574 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2575 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2576 } 2577 } 2578 2579 /* receive values */ 2580 if (rank) { 2581 rvalues = work; 2582 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2583 for (source = 0; source < rank; source++) { 2584 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2585 /* process values */ 2586 for (i = 0; i < count; i++) { 2587 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2588 } 2589 } 2590 } 2591 2592 PetscCall(VecRestoreArray(v, &va)); 2593 PetscCall(PetscFree(work)); 2594 PetscFunctionReturn(0); 2595 } 2596 2597 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2598 { 2599 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2600 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2601 PetscScalar *x, *ptr, *from; 2602 Vec bb1; 2603 const PetscScalar *b; 2604 2605 PetscFunctionBegin; 2606 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2607 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2608 2609 if (flag == SOR_APPLY_UPPER) { 2610 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2611 PetscFunctionReturn(0); 2612 } 2613 2614 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2615 if (flag & SOR_ZERO_INITIAL_GUESS) { 2616 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2617 its--; 2618 } 2619 2620 PetscCall(VecDuplicate(bb, &bb1)); 2621 while (its--) { 2622 /* lower triangular part: slvec0b = - B^T*xx */ 2623 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2624 2625 /* copy xx into slvec0a */ 2626 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2627 PetscCall(VecGetArray(xx, &x)); 2628 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2629 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2630 2631 PetscCall(VecScale(mat->slvec0, -1.0)); 2632 2633 /* copy bb into slvec1a */ 2634 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2635 PetscCall(VecGetArrayRead(bb, &b)); 2636 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2637 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2638 2639 /* set slvec1b = 0 */ 2640 PetscCall(VecSet(mat->slvec1b, 0.0)); 2641 2642 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2643 PetscCall(VecRestoreArray(xx, &x)); 2644 PetscCall(VecRestoreArrayRead(bb, &b)); 2645 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2646 2647 /* upper triangular part: bb1 = bb1 - B*x */ 2648 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2649 2650 /* local diagonal sweep */ 2651 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2652 } 2653 PetscCall(VecDestroy(&bb1)); 2654 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2655 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2656 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2657 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2658 } else if (flag & SOR_EISENSTAT) { 2659 Vec xx1; 2660 PetscBool hasop; 2661 const PetscScalar *diag; 2662 PetscScalar *sl, scale = (omega - 2.0) / omega; 2663 PetscInt i, n; 2664 2665 if (!mat->xx1) { 2666 PetscCall(VecDuplicate(bb, &mat->xx1)); 2667 PetscCall(VecDuplicate(bb, &mat->bb1)); 2668 } 2669 xx1 = mat->xx1; 2670 bb1 = mat->bb1; 2671 2672 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2673 2674 if (!mat->diag) { 2675 /* this is wrong for same matrix with new nonzero values */ 2676 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2677 PetscCall(MatGetDiagonal(matin, mat->diag)); 2678 } 2679 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2680 2681 if (hasop) { 2682 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2683 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2684 } else { 2685 /* 2686 These two lines are replaced by code that may be a bit faster for a good compiler 2687 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2688 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2689 */ 2690 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2691 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2692 PetscCall(VecGetArrayRead(bb, &b)); 2693 PetscCall(VecGetArray(xx, &x)); 2694 PetscCall(VecGetLocalSize(xx, &n)); 2695 if (omega == 1.0) { 2696 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2697 PetscCall(PetscLogFlops(2.0 * n)); 2698 } else { 2699 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2700 PetscCall(PetscLogFlops(3.0 * n)); 2701 } 2702 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2703 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2704 PetscCall(VecRestoreArrayRead(bb, &b)); 2705 PetscCall(VecRestoreArray(xx, &x)); 2706 } 2707 2708 /* multiply off-diagonal portion of matrix */ 2709 PetscCall(VecSet(mat->slvec1b, 0.0)); 2710 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2711 PetscCall(VecGetArray(mat->slvec0, &from)); 2712 PetscCall(VecGetArray(xx, &x)); 2713 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2714 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2715 PetscCall(VecRestoreArray(xx, &x)); 2716 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2717 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2718 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2719 2720 /* local sweep */ 2721 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2722 PetscCall(VecAXPY(xx, 1.0, xx1)); 2723 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2724 PetscFunctionReturn(0); 2725 } 2726 2727 /*@ 2728 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2729 CSR format the local rows. 2730 2731 Collective 2732 2733 Input Parameters: 2734 + comm - MPI communicator 2735 . bs - the block size, only a block size of 1 is supported 2736 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2737 . n - This value should be the same as the local size used in creating the 2738 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2739 calculated if N is given) For square matrices n is almost always m. 2740 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2741 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2742 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2743 . j - column indices 2744 - a - matrix values 2745 2746 Output Parameter: 2747 . mat - the matrix 2748 2749 Level: intermediate 2750 2751 Notes: 2752 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2753 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2754 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2755 2756 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2757 2758 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2759 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2760 @*/ 2761 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2762 { 2763 PetscFunctionBegin; 2764 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2765 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2766 PetscCall(MatCreate(comm, mat)); 2767 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2768 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2769 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2770 PetscFunctionReturn(0); 2771 } 2772 2773 /*@C 2774 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2775 2776 Collective 2777 2778 Input Parameters: 2779 + B - the matrix 2780 . bs - the block size 2781 . i - the indices into j for the start of each local row (starts with zero) 2782 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2783 - v - optional values in the matrix 2784 2785 Level: advanced 2786 2787 Notes: 2788 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2789 and usually the numerical values as well 2790 2791 Any entries below the diagonal are ignored 2792 2793 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2794 @*/ 2795 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2796 { 2797 PetscFunctionBegin; 2798 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2803 { 2804 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2805 PetscInt *indx; 2806 PetscScalar *values; 2807 2808 PetscFunctionBegin; 2809 PetscCall(MatGetSize(inmat, &m, &N)); 2810 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2811 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2812 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2813 PetscInt *bindx, rmax = a->rmax, j; 2814 PetscMPIInt rank, size; 2815 2816 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2817 mbs = m / bs; 2818 Nbs = N / cbs; 2819 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2820 nbs = n / cbs; 2821 2822 PetscCall(PetscMalloc1(rmax, &bindx)); 2823 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2824 2825 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2826 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2827 if (rank == size - 1) { 2828 /* Check sum(nbs) = Nbs */ 2829 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2830 } 2831 2832 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2833 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2834 for (i = 0; i < mbs; i++) { 2835 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2836 nnz = nnz / bs; 2837 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2838 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2839 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2840 } 2841 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2842 PetscCall(PetscFree(bindx)); 2843 2844 PetscCall(MatCreate(comm, outmat)); 2845 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2846 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2847 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2848 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2849 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2850 MatPreallocateEnd(dnz, onz); 2851 } 2852 2853 /* numeric phase */ 2854 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2855 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2856 2857 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2858 for (i = 0; i < m; i++) { 2859 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2860 Ii = i + rstart; 2861 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2862 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2863 } 2864 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2865 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2866 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2867 PetscFunctionReturn(0); 2868 } 2869