1 2 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 4 #include <../src/mat/impls/sbaij/seq/sbaij.h> 5 #include <petscblaslapack.h> 6 7 #if defined(PETSC_HAVE_ELEMENTAL) 8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 9 #endif 10 #if defined(PETSC_HAVE_SCALAPACK) 11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 12 #endif 13 14 /* This could be moved to matimpl.h */ 15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 16 { 17 Mat preallocator; 18 PetscInt r, rstart, rend; 19 PetscInt bs, i, m, n, M, N; 20 PetscBool cong = PETSC_TRUE; 21 22 PetscFunctionBegin; 23 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 24 PetscValidLogicalCollectiveInt(B, nm, 2); 25 for (i = 0; i < nm; i++) { 26 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 27 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 28 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 29 } 30 PetscValidLogicalCollectiveBool(B, fill, 5); 31 PetscCall(MatGetBlockSize(B, &bs)); 32 PetscCall(MatGetSize(B, &M, &N)); 33 PetscCall(MatGetLocalSize(B, &m, &n)); 34 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 35 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 36 PetscCall(MatSetBlockSize(preallocator, bs)); 37 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 38 PetscCall(MatSetUp(preallocator)); 39 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 40 for (r = rstart; r < rend; ++r) { 41 PetscInt ncols; 42 const PetscInt *row; 43 const PetscScalar *vals; 44 45 for (i = 0; i < nm; i++) { 46 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 47 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 48 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 49 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 50 } 51 } 52 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 53 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 54 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 55 PetscCall(MatDestroy(&preallocator)); 56 PetscFunctionReturn(0); 57 } 58 59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 60 { 61 Mat B; 62 PetscInt r; 63 64 PetscFunctionBegin; 65 if (reuse != MAT_REUSE_MATRIX) { 66 PetscBool symm = PETSC_TRUE, isdense; 67 PetscInt bs; 68 69 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 70 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 71 PetscCall(MatSetType(B, newtype)); 72 PetscCall(MatGetBlockSize(A, &bs)); 73 PetscCall(MatSetBlockSize(B, bs)); 74 PetscCall(PetscLayoutSetUp(B->rmap)); 75 PetscCall(PetscLayoutSetUp(B->cmap)); 76 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 77 if (!isdense) { 78 PetscCall(MatGetRowUpperTriangular(A)); 79 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 80 PetscCall(MatRestoreRowUpperTriangular(A)); 81 } else { 82 PetscCall(MatSetUp(B)); 83 } 84 } else { 85 B = *newmat; 86 PetscCall(MatZeroEntries(B)); 87 } 88 89 PetscCall(MatGetRowUpperTriangular(A)); 90 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 91 PetscInt ncols; 92 const PetscInt *row; 93 const PetscScalar *vals; 94 95 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 96 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 97 #if defined(PETSC_USE_COMPLEX) 98 if (A->hermitian == PETSC_BOOL3_TRUE) { 99 PetscInt i; 100 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 101 } else { 102 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 103 } 104 #else 105 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 106 #endif 107 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 108 } 109 PetscCall(MatRestoreRowUpperTriangular(A)); 110 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 111 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 112 113 if (reuse == MAT_INPLACE_MATRIX) { 114 PetscCall(MatHeaderReplace(A, &B)); 115 } else { 116 *newmat = B; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 122 { 123 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 124 125 PetscFunctionBegin; 126 PetscCall(MatStoreValues(aij->A)); 127 PetscCall(MatStoreValues(aij->B)); 128 PetscFunctionReturn(0); 129 } 130 131 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 132 { 133 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 134 135 PetscFunctionBegin; 136 PetscCall(MatRetrieveValues(aij->A)); 137 PetscCall(MatRetrieveValues(aij->B)); 138 PetscFunctionReturn(0); 139 } 140 141 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 142 { \ 143 brow = row / bs; \ 144 rp = aj + ai[brow]; \ 145 ap = aa + bs2 * ai[brow]; \ 146 rmax = aimax[brow]; \ 147 nrow = ailen[brow]; \ 148 bcol = col / bs; \ 149 ridx = row % bs; \ 150 cidx = col % bs; \ 151 low = 0; \ 152 high = nrow; \ 153 while (high - low > 3) { \ 154 t = (low + high) / 2; \ 155 if (rp[t] > bcol) high = t; \ 156 else low = t; \ 157 } \ 158 for (_i = low; _i < high; _i++) { \ 159 if (rp[_i] > bcol) break; \ 160 if (rp[_i] == bcol) { \ 161 bap = ap + bs2 * _i + bs * cidx + ridx; \ 162 if (addv == ADD_VALUES) *bap += value; \ 163 else *bap = value; \ 164 goto a_noinsert; \ 165 } \ 166 } \ 167 if (a->nonew == 1) goto a_noinsert; \ 168 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 169 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 170 N = nrow++ - 1; \ 171 /* shift up all the later entries in this row */ \ 172 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 173 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 174 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 175 rp[_i] = bcol; \ 176 ap[bs2 * _i + bs * cidx + ridx] = value; \ 177 A->nonzerostate++; \ 178 a_noinsert:; \ 179 ailen[brow] = nrow; \ 180 } 181 182 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 183 { \ 184 brow = row / bs; \ 185 rp = bj + bi[brow]; \ 186 ap = ba + bs2 * bi[brow]; \ 187 rmax = bimax[brow]; \ 188 nrow = bilen[brow]; \ 189 bcol = col / bs; \ 190 ridx = row % bs; \ 191 cidx = col % bs; \ 192 low = 0; \ 193 high = nrow; \ 194 while (high - low > 3) { \ 195 t = (low + high) / 2; \ 196 if (rp[t] > bcol) high = t; \ 197 else low = t; \ 198 } \ 199 for (_i = low; _i < high; _i++) { \ 200 if (rp[_i] > bcol) break; \ 201 if (rp[_i] == bcol) { \ 202 bap = ap + bs2 * _i + bs * cidx + ridx; \ 203 if (addv == ADD_VALUES) *bap += value; \ 204 else *bap = value; \ 205 goto b_noinsert; \ 206 } \ 207 } \ 208 if (b->nonew == 1) goto b_noinsert; \ 209 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 210 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 211 N = nrow++ - 1; \ 212 /* shift up all the later entries in this row */ \ 213 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 214 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 215 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 216 rp[_i] = bcol; \ 217 ap[bs2 * _i + bs * cidx + ridx] = value; \ 218 B->nonzerostate++; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } 222 223 /* Only add/insert a(i,j) with i<=j (blocks). 224 Any a(i,j) with i>j input by user is ingored or generates an error 225 */ 226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 227 { 228 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 229 MatScalar value; 230 PetscBool roworiented = baij->roworiented; 231 PetscInt i, j, row, col; 232 PetscInt rstart_orig = mat->rmap->rstart; 233 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 234 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 235 236 /* Some Variables required in the macro */ 237 Mat A = baij->A; 238 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 239 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 240 MatScalar *aa = a->a; 241 242 Mat B = baij->B; 243 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 244 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 245 MatScalar *ba = b->a; 246 247 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 248 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 249 MatScalar *ap, *bap; 250 251 /* for stash */ 252 PetscInt n_loc, *in_loc = NULL; 253 MatScalar *v_loc = NULL; 254 255 PetscFunctionBegin; 256 if (!baij->donotstash) { 257 if (n > baij->n_loc) { 258 PetscCall(PetscFree(baij->in_loc)); 259 PetscCall(PetscFree(baij->v_loc)); 260 PetscCall(PetscMalloc1(n, &baij->in_loc)); 261 PetscCall(PetscMalloc1(n, &baij->v_loc)); 262 263 baij->n_loc = n; 264 } 265 in_loc = baij->in_loc; 266 v_loc = baij->v_loc; 267 } 268 269 for (i = 0; i < m; i++) { 270 if (im[i] < 0) continue; 271 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 272 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 273 row = im[i] - rstart_orig; /* local row index */ 274 for (j = 0; j < n; j++) { 275 if (im[i] / bs > in[j] / bs) { 276 if (a->ignore_ltriangular) { 277 continue; /* ignore lower triangular blocks */ 278 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 279 } 280 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 281 col = in[j] - cstart_orig; /* local col index */ 282 brow = row / bs; 283 bcol = col / bs; 284 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 285 if (roworiented) value = v[i * n + j]; 286 else value = v[i + j * m]; 287 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 288 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 289 } else if (in[j] < 0) { 290 continue; 291 } else { 292 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 293 /* off-diag entry (B) */ 294 if (mat->was_assembled) { 295 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 296 #if defined(PETSC_USE_CTABLE) 297 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 298 col = col - 1; 299 #else 300 col = baij->colmap[in[j] / bs] - 1; 301 #endif 302 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 303 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 304 col = in[j]; 305 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 306 B = baij->B; 307 b = (Mat_SeqBAIJ *)(B)->data; 308 bimax = b->imax; 309 bi = b->i; 310 bilen = b->ilen; 311 bj = b->j; 312 ba = b->a; 313 } else col += in[j] % bs; 314 } else col = in[j]; 315 if (roworiented) value = v[i * n + j]; 316 else value = v[i + j * m]; 317 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 318 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 319 } 320 } 321 } else { /* off processor entry */ 322 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 323 if (!baij->donotstash) { 324 mat->assembled = PETSC_FALSE; 325 n_loc = 0; 326 for (j = 0; j < n; j++) { 327 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 328 in_loc[n_loc] = in[j]; 329 if (roworiented) { 330 v_loc[n_loc] = v[i * n + j]; 331 } else { 332 v_loc[n_loc] = v[j * m + i]; 333 } 334 n_loc++; 335 } 336 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 337 } 338 } 339 } 340 PetscFunctionReturn(0); 341 } 342 343 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 344 { 345 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 346 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 347 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 348 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 349 PetscBool roworiented = a->roworiented; 350 const PetscScalar *value = v; 351 MatScalar *ap, *aa = a->a, *bap; 352 353 PetscFunctionBegin; 354 if (col < row) { 355 if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */ 356 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 357 } 358 rp = aj + ai[row]; 359 ap = aa + bs2 * ai[row]; 360 rmax = imax[row]; 361 nrow = ailen[row]; 362 value = v; 363 low = 0; 364 high = nrow; 365 366 while (high - low > 7) { 367 t = (low + high) / 2; 368 if (rp[t] > col) high = t; 369 else low = t; 370 } 371 for (i = low; i < high; i++) { 372 if (rp[i] > col) break; 373 if (rp[i] == col) { 374 bap = ap + bs2 * i; 375 if (roworiented) { 376 if (is == ADD_VALUES) { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 379 } 380 } else { 381 for (ii = 0; ii < bs; ii++) { 382 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 383 } 384 } 385 } else { 386 if (is == ADD_VALUES) { 387 for (ii = 0; ii < bs; ii++) { 388 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 389 } 390 } else { 391 for (ii = 0; ii < bs; ii++) { 392 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 393 } 394 } 395 } 396 goto noinsert2; 397 } 398 } 399 if (nonew == 1) goto noinsert2; 400 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 401 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 402 N = nrow++ - 1; 403 high++; 404 /* shift up all the later entries in this row */ 405 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 406 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 407 rp[i] = col; 408 bap = ap + bs2 * i; 409 if (roworiented) { 410 for (ii = 0; ii < bs; ii++) { 411 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 412 } 413 } else { 414 for (ii = 0; ii < bs; ii++) { 415 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 416 } 417 } 418 noinsert2:; 419 ailen[row] = nrow; 420 PetscFunctionReturn(0); 421 } 422 423 /* 424 This routine is exactly duplicated in mpibaij.c 425 */ 426 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 427 { 428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 429 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 430 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 431 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 432 PetscBool roworiented = a->roworiented; 433 const PetscScalar *value = v; 434 MatScalar *ap, *aa = a->a, *bap; 435 436 PetscFunctionBegin; 437 rp = aj + ai[row]; 438 ap = aa + bs2 * ai[row]; 439 rmax = imax[row]; 440 nrow = ailen[row]; 441 low = 0; 442 high = nrow; 443 value = v; 444 while (high - low > 7) { 445 t = (low + high) / 2; 446 if (rp[t] > col) high = t; 447 else low = t; 448 } 449 for (i = low; i < high; i++) { 450 if (rp[i] > col) break; 451 if (rp[i] == col) { 452 bap = ap + bs2 * i; 453 if (roworiented) { 454 if (is == ADD_VALUES) { 455 for (ii = 0; ii < bs; ii++) { 456 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 457 } 458 } else { 459 for (ii = 0; ii < bs; ii++) { 460 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 461 } 462 } 463 } else { 464 if (is == ADD_VALUES) { 465 for (ii = 0; ii < bs; ii++, value += bs) { 466 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 467 bap += bs; 468 } 469 } else { 470 for (ii = 0; ii < bs; ii++, value += bs) { 471 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 472 bap += bs; 473 } 474 } 475 } 476 goto noinsert2; 477 } 478 } 479 if (nonew == 1) goto noinsert2; 480 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 481 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 482 N = nrow++ - 1; 483 high++; 484 /* shift up all the later entries in this row */ 485 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 486 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 487 rp[i] = col; 488 bap = ap + bs2 * i; 489 if (roworiented) { 490 for (ii = 0; ii < bs; ii++) { 491 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 492 } 493 } else { 494 for (ii = 0; ii < bs; ii++) { 495 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 496 } 497 } 498 noinsert2:; 499 ailen[row] = nrow; 500 PetscFunctionReturn(0); 501 } 502 503 /* 504 This routine could be optimized by removing the need for the block copy below and passing stride information 505 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 506 */ 507 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 508 { 509 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 510 const MatScalar *value; 511 MatScalar *barray = baij->barray; 512 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 513 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 514 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 515 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 516 517 PetscFunctionBegin; 518 if (!barray) { 519 PetscCall(PetscMalloc1(bs2, &barray)); 520 baij->barray = barray; 521 } 522 523 if (roworiented) { 524 stepval = (n - 1) * bs; 525 } else { 526 stepval = (m - 1) * bs; 527 } 528 for (i = 0; i < m; i++) { 529 if (im[i] < 0) continue; 530 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 531 if (im[i] >= rstart && im[i] < rend) { 532 row = im[i] - rstart; 533 for (j = 0; j < n; j++) { 534 if (im[i] > in[j]) { 535 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 536 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 537 } 538 /* If NumCol = 1 then a copy is not required */ 539 if ((roworiented) && (n == 1)) { 540 barray = (MatScalar *)v + i * bs2; 541 } else if ((!roworiented) && (m == 1)) { 542 barray = (MatScalar *)v + j * bs2; 543 } else { /* Here a copy is required */ 544 if (roworiented) { 545 value = v + i * (stepval + bs) * bs + j * bs; 546 } else { 547 value = v + j * (stepval + bs) * bs + i * bs; 548 } 549 for (ii = 0; ii < bs; ii++, value += stepval) { 550 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 551 } 552 barray -= bs2; 553 } 554 555 if (in[j] >= cstart && in[j] < cend) { 556 col = in[j] - cstart; 557 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 558 } else if (in[j] < 0) { 559 continue; 560 } else { 561 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 562 if (mat->was_assembled) { 563 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 564 565 #if defined(PETSC_USE_DEBUG) 566 #if defined(PETSC_USE_CTABLE) 567 { 568 PetscInt data; 569 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 570 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 571 } 572 #else 573 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 574 #endif 575 #endif 576 #if defined(PETSC_USE_CTABLE) 577 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 578 col = (col - 1) / bs; 579 #else 580 col = (baij->colmap[in[j]] - 1) / bs; 581 #endif 582 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 583 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 584 col = in[j]; 585 } 586 } else col = in[j]; 587 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 588 } 589 } 590 } else { 591 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 592 if (!baij->donotstash) { 593 if (roworiented) { 594 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 595 } else { 596 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 597 } 598 } 599 } 600 } 601 PetscFunctionReturn(0); 602 } 603 604 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 605 { 606 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 607 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 608 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 609 610 PetscFunctionBegin; 611 for (i = 0; i < m; i++) { 612 if (idxm[i] < 0) continue; /* negative row */ 613 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 614 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 615 row = idxm[i] - bsrstart; 616 for (j = 0; j < n; j++) { 617 if (idxn[j] < 0) continue; /* negative column */ 618 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 619 if (idxn[j] >= bscstart && idxn[j] < bscend) { 620 col = idxn[j] - bscstart; 621 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 622 } else { 623 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 624 #if defined(PETSC_USE_CTABLE) 625 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 626 data--; 627 #else 628 data = baij->colmap[idxn[j] / bs] - 1; 629 #endif 630 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 631 else { 632 col = data + idxn[j] % bs; 633 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 634 } 635 } 636 } 637 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 638 } 639 PetscFunctionReturn(0); 640 } 641 642 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 643 { 644 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 645 PetscReal sum[2], *lnorm2; 646 647 PetscFunctionBegin; 648 if (baij->size == 1) { 649 PetscCall(MatNorm(baij->A, type, norm)); 650 } else { 651 if (type == NORM_FROBENIUS) { 652 PetscCall(PetscMalloc1(2, &lnorm2)); 653 PetscCall(MatNorm(baij->A, type, lnorm2)); 654 *lnorm2 = (*lnorm2) * (*lnorm2); 655 lnorm2++; /* squar power of norm(A) */ 656 PetscCall(MatNorm(baij->B, type, lnorm2)); 657 *lnorm2 = (*lnorm2) * (*lnorm2); 658 lnorm2--; /* squar power of norm(B) */ 659 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 660 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 661 PetscCall(PetscFree(lnorm2)); 662 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 663 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 664 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 665 PetscReal *rsum, *rsum2, vabs; 666 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 667 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 668 MatScalar *v; 669 670 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 671 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 672 /* Amat */ 673 v = amat->a; 674 jj = amat->j; 675 for (brow = 0; brow < mbs; brow++) { 676 grow = bs * (rstart + brow); 677 nz = amat->i[brow + 1] - amat->i[brow]; 678 for (bcol = 0; bcol < nz; bcol++) { 679 gcol = bs * (rstart + *jj); 680 jj++; 681 for (col = 0; col < bs; col++) { 682 for (row = 0; row < bs; row++) { 683 vabs = PetscAbsScalar(*v); 684 v++; 685 rsum[gcol + col] += vabs; 686 /* non-diagonal block */ 687 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 688 } 689 } 690 } 691 PetscCall(PetscLogFlops(nz * bs * bs)); 692 } 693 /* Bmat */ 694 v = bmat->a; 695 jj = bmat->j; 696 for (brow = 0; brow < mbs; brow++) { 697 grow = bs * (rstart + brow); 698 nz = bmat->i[brow + 1] - bmat->i[brow]; 699 for (bcol = 0; bcol < nz; bcol++) { 700 gcol = bs * garray[*jj]; 701 jj++; 702 for (col = 0; col < bs; col++) { 703 for (row = 0; row < bs; row++) { 704 vabs = PetscAbsScalar(*v); 705 v++; 706 rsum[gcol + col] += vabs; 707 rsum[grow + row] += vabs; 708 } 709 } 710 } 711 PetscCall(PetscLogFlops(nz * bs * bs)); 712 } 713 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 714 *norm = 0.0; 715 for (col = 0; col < mat->cmap->N; col++) { 716 if (rsum2[col] > *norm) *norm = rsum2[col]; 717 } 718 PetscCall(PetscFree2(rsum, rsum2)); 719 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 725 { 726 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 727 PetscInt nstash, reallocs; 728 729 PetscFunctionBegin; 730 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 731 732 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 733 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 734 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 735 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 736 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 737 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 738 PetscFunctionReturn(0); 739 } 740 741 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 742 { 743 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 744 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 745 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 746 PetscInt *row, *col; 747 PetscBool other_disassembled; 748 PetscMPIInt n; 749 PetscBool r1, r2, r3; 750 MatScalar *val; 751 752 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 753 PetscFunctionBegin; 754 if (!baij->donotstash && !mat->nooffprocentries) { 755 while (1) { 756 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 757 if (!flg) break; 758 759 for (i = 0; i < n;) { 760 /* Now identify the consecutive vals belonging to the same row */ 761 for (j = i, rstart = row[j]; j < n; j++) { 762 if (row[j] != rstart) break; 763 } 764 if (j < n) ncols = j - i; 765 else ncols = n - i; 766 /* Now assemble all these values with a single function call */ 767 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 768 i = j; 769 } 770 } 771 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 772 /* Now process the block-stash. Since the values are stashed column-oriented, 773 set the roworiented flag to column oriented, and after MatSetValues() 774 restore the original flags */ 775 r1 = baij->roworiented; 776 r2 = a->roworiented; 777 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 778 779 baij->roworiented = PETSC_FALSE; 780 a->roworiented = PETSC_FALSE; 781 782 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 783 while (1) { 784 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 785 if (!flg) break; 786 787 for (i = 0; i < n;) { 788 /* Now identify the consecutive vals belonging to the same row */ 789 for (j = i, rstart = row[j]; j < n; j++) { 790 if (row[j] != rstart) break; 791 } 792 if (j < n) ncols = j - i; 793 else ncols = n - i; 794 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 795 i = j; 796 } 797 } 798 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 799 800 baij->roworiented = r1; 801 a->roworiented = r2; 802 803 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 804 } 805 806 PetscCall(MatAssemblyBegin(baij->A, mode)); 807 PetscCall(MatAssemblyEnd(baij->A, mode)); 808 809 /* determine if any processor has disassembled, if so we must 810 also disassemble ourselves, in order that we may reassemble. */ 811 /* 812 if nonzero structure of submatrix B cannot change then we know that 813 no processor disassembled thus we can skip this stuff 814 */ 815 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 816 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 817 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 818 } 819 820 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 821 PetscCall(MatAssemblyBegin(baij->B, mode)); 822 PetscCall(MatAssemblyEnd(baij->B, mode)); 823 824 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 825 826 baij->rowvalues = NULL; 827 828 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 829 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 830 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 831 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 837 #include <petscdraw.h> 838 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 839 { 840 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 841 PetscInt bs = mat->rmap->bs; 842 PetscMPIInt rank = baij->rank; 843 PetscBool iascii, isdraw; 844 PetscViewer sviewer; 845 PetscViewerFormat format; 846 847 PetscFunctionBegin; 848 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 849 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 850 if (iascii) { 851 PetscCall(PetscViewerGetFormat(viewer, &format)); 852 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 853 MatInfo info; 854 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 855 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 856 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 857 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 858 mat->rmap->bs, (double)info.memory)); 859 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 860 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 861 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 862 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 863 PetscCall(PetscViewerFlush(viewer)); 864 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 865 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 866 PetscCall(VecScatterView(baij->Mvctx, viewer)); 867 PetscFunctionReturn(0); 868 } else if (format == PETSC_VIEWER_ASCII_INFO) { 869 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 870 PetscFunctionReturn(0); 871 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 872 PetscFunctionReturn(0); 873 } 874 } 875 876 if (isdraw) { 877 PetscDraw draw; 878 PetscBool isnull; 879 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 880 PetscCall(PetscDrawIsNull(draw, &isnull)); 881 if (isnull) PetscFunctionReturn(0); 882 } 883 884 { 885 /* assemble the entire matrix onto first processor. */ 886 Mat A; 887 Mat_SeqSBAIJ *Aloc; 888 Mat_SeqBAIJ *Bloc; 889 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 890 MatScalar *a; 891 const char *matname; 892 893 /* Should this be the same type as mat? */ 894 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 895 if (rank == 0) { 896 PetscCall(MatSetSizes(A, M, N, M, N)); 897 } else { 898 PetscCall(MatSetSizes(A, 0, 0, M, N)); 899 } 900 PetscCall(MatSetType(A, MATMPISBAIJ)); 901 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 902 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 903 904 /* copy over the A part */ 905 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 906 ai = Aloc->i; 907 aj = Aloc->j; 908 a = Aloc->a; 909 PetscCall(PetscMalloc1(bs, &rvals)); 910 911 for (i = 0; i < mbs; i++) { 912 rvals[0] = bs * (baij->rstartbs + i); 913 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 914 for (j = ai[i]; j < ai[i + 1]; j++) { 915 col = (baij->cstartbs + aj[j]) * bs; 916 for (k = 0; k < bs; k++) { 917 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 918 col++; 919 a += bs; 920 } 921 } 922 } 923 /* copy over the B part */ 924 Bloc = (Mat_SeqBAIJ *)baij->B->data; 925 ai = Bloc->i; 926 aj = Bloc->j; 927 a = Bloc->a; 928 for (i = 0; i < mbs; i++) { 929 rvals[0] = bs * (baij->rstartbs + i); 930 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 931 for (j = ai[i]; j < ai[i + 1]; j++) { 932 col = baij->garray[aj[j]] * bs; 933 for (k = 0; k < bs; k++) { 934 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 935 col++; 936 a += bs; 937 } 938 } 939 } 940 PetscCall(PetscFree(rvals)); 941 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 942 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 943 /* 944 Everyone has to call to draw the matrix since the graphics waits are 945 synchronized across all processors that share the PetscDraw object 946 */ 947 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 948 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 949 if (rank == 0) { 950 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 951 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 952 } 953 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 954 PetscCall(PetscViewerFlush(viewer)); 955 PetscCall(MatDestroy(&A)); 956 } 957 PetscFunctionReturn(0); 958 } 959 960 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 961 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 962 963 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 964 { 965 PetscBool iascii, isdraw, issocket, isbinary; 966 967 PetscFunctionBegin; 968 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 969 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 970 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 971 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 972 if (iascii || isdraw || issocket) { 973 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 974 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 975 PetscFunctionReturn(0); 976 } 977 978 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 979 { 980 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 981 982 PetscFunctionBegin; 983 #if defined(PETSC_USE_LOG) 984 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 985 #endif 986 PetscCall(MatStashDestroy_Private(&mat->stash)); 987 PetscCall(MatStashDestroy_Private(&mat->bstash)); 988 PetscCall(MatDestroy(&baij->A)); 989 PetscCall(MatDestroy(&baij->B)); 990 #if defined(PETSC_USE_CTABLE) 991 PetscCall(PetscHMapIDestroy(&baij->colmap)); 992 #else 993 PetscCall(PetscFree(baij->colmap)); 994 #endif 995 PetscCall(PetscFree(baij->garray)); 996 PetscCall(VecDestroy(&baij->lvec)); 997 PetscCall(VecScatterDestroy(&baij->Mvctx)); 998 PetscCall(VecDestroy(&baij->slvec0)); 999 PetscCall(VecDestroy(&baij->slvec0b)); 1000 PetscCall(VecDestroy(&baij->slvec1)); 1001 PetscCall(VecDestroy(&baij->slvec1a)); 1002 PetscCall(VecDestroy(&baij->slvec1b)); 1003 PetscCall(VecScatterDestroy(&baij->sMvctx)); 1004 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1005 PetscCall(PetscFree(baij->barray)); 1006 PetscCall(PetscFree(baij->hd)); 1007 PetscCall(VecDestroy(&baij->diag)); 1008 PetscCall(VecDestroy(&baij->bb1)); 1009 PetscCall(VecDestroy(&baij->xx1)); 1010 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1011 PetscCall(PetscFree(baij->setvaluescopy)); 1012 #endif 1013 PetscCall(PetscFree(baij->in_loc)); 1014 PetscCall(PetscFree(baij->v_loc)); 1015 PetscCall(PetscFree(baij->rangebs)); 1016 PetscCall(PetscFree(mat->data)); 1017 1018 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 1022 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 1023 #if defined(PETSC_HAVE_ELEMENTAL) 1024 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 1025 #endif 1026 #if defined(PETSC_HAVE_SCALAPACK) 1027 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 1028 #endif 1029 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 1030 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1035 { 1036 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1037 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1038 PetscScalar *from; 1039 const PetscScalar *x; 1040 1041 PetscFunctionBegin; 1042 /* diagonal part */ 1043 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1044 PetscCall(VecSet(a->slvec1b, 0.0)); 1045 1046 /* subdiagonal part */ 1047 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1048 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1049 1050 /* copy x into the vec slvec0 */ 1051 PetscCall(VecGetArray(a->slvec0, &from)); 1052 PetscCall(VecGetArrayRead(xx, &x)); 1053 1054 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1055 PetscCall(VecRestoreArray(a->slvec0, &from)); 1056 PetscCall(VecRestoreArrayRead(xx, &x)); 1057 1058 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1059 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1060 /* supperdiagonal part */ 1061 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1066 { 1067 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1068 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1069 PetscScalar *from; 1070 const PetscScalar *x; 1071 1072 PetscFunctionBegin; 1073 /* diagonal part */ 1074 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1075 PetscCall(VecSet(a->slvec1b, 0.0)); 1076 1077 /* subdiagonal part */ 1078 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1079 1080 /* copy x into the vec slvec0 */ 1081 PetscCall(VecGetArray(a->slvec0, &from)); 1082 PetscCall(VecGetArrayRead(xx, &x)); 1083 1084 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1085 PetscCall(VecRestoreArray(a->slvec0, &from)); 1086 PetscCall(VecRestoreArrayRead(xx, &x)); 1087 1088 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1089 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1090 /* supperdiagonal part */ 1091 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1096 { 1097 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1098 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1099 PetscScalar *from, zero = 0.0; 1100 const PetscScalar *x; 1101 1102 PetscFunctionBegin; 1103 /* diagonal part */ 1104 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1105 PetscCall(VecSet(a->slvec1b, zero)); 1106 1107 /* subdiagonal part */ 1108 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1109 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1110 1111 /* copy x into the vec slvec0 */ 1112 PetscCall(VecGetArray(a->slvec0, &from)); 1113 PetscCall(VecGetArrayRead(xx, &x)); 1114 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1115 PetscCall(VecRestoreArray(a->slvec0, &from)); 1116 1117 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1118 PetscCall(VecRestoreArrayRead(xx, &x)); 1119 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1120 1121 /* supperdiagonal part */ 1122 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1127 { 1128 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1129 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1130 PetscScalar *from, zero = 0.0; 1131 const PetscScalar *x; 1132 1133 PetscFunctionBegin; 1134 /* diagonal part */ 1135 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1136 PetscCall(VecSet(a->slvec1b, zero)); 1137 1138 /* subdiagonal part */ 1139 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1140 1141 /* copy x into the vec slvec0 */ 1142 PetscCall(VecGetArray(a->slvec0, &from)); 1143 PetscCall(VecGetArrayRead(xx, &x)); 1144 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1145 PetscCall(VecRestoreArray(a->slvec0, &from)); 1146 1147 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1148 PetscCall(VecRestoreArrayRead(xx, &x)); 1149 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1150 1151 /* supperdiagonal part */ 1152 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 /* 1157 This only works correctly for square matrices where the subblock A->A is the 1158 diagonal block 1159 */ 1160 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1161 { 1162 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1163 1164 PetscFunctionBegin; 1165 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1166 PetscCall(MatGetDiagonal(a->A, v)); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1171 { 1172 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1173 1174 PetscFunctionBegin; 1175 PetscCall(MatScale(a->A, aa)); 1176 PetscCall(MatScale(a->B, aa)); 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1181 { 1182 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1183 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1184 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1185 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1186 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1187 1188 PetscFunctionBegin; 1189 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1190 mat->getrowactive = PETSC_TRUE; 1191 1192 if (!mat->rowvalues && (idx || v)) { 1193 /* 1194 allocate enough space to hold information from the longest row. 1195 */ 1196 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1197 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1198 PetscInt max = 1, mbs = mat->mbs, tmp; 1199 for (i = 0; i < mbs; i++) { 1200 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1201 if (max < tmp) max = tmp; 1202 } 1203 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1204 } 1205 1206 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1207 lrow = row - brstart; /* local row index */ 1208 1209 pvA = &vworkA; 1210 pcA = &cworkA; 1211 pvB = &vworkB; 1212 pcB = &cworkB; 1213 if (!v) { 1214 pvA = NULL; 1215 pvB = NULL; 1216 } 1217 if (!idx) { 1218 pcA = NULL; 1219 if (!v) pcB = NULL; 1220 } 1221 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1222 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1223 nztot = nzA + nzB; 1224 1225 cmap = mat->garray; 1226 if (v || idx) { 1227 if (nztot) { 1228 /* Sort by increasing column numbers, assuming A and B already sorted */ 1229 PetscInt imark = -1; 1230 if (v) { 1231 *v = v_p = mat->rowvalues; 1232 for (i = 0; i < nzB; i++) { 1233 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1234 else break; 1235 } 1236 imark = i; 1237 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1238 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1239 } 1240 if (idx) { 1241 *idx = idx_p = mat->rowindices; 1242 if (imark > -1) { 1243 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1244 } else { 1245 for (i = 0; i < nzB; i++) { 1246 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1247 else break; 1248 } 1249 imark = i; 1250 } 1251 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1252 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1253 } 1254 } else { 1255 if (idx) *idx = NULL; 1256 if (v) *v = NULL; 1257 } 1258 } 1259 *nz = nztot; 1260 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1261 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1262 PetscFunctionReturn(0); 1263 } 1264 1265 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1266 { 1267 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1268 1269 PetscFunctionBegin; 1270 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1271 baij->getrowactive = PETSC_FALSE; 1272 PetscFunctionReturn(0); 1273 } 1274 1275 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1276 { 1277 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1278 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1279 1280 PetscFunctionBegin; 1281 aA->getrow_utriangular = PETSC_TRUE; 1282 PetscFunctionReturn(0); 1283 } 1284 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1285 { 1286 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1287 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1288 1289 PetscFunctionBegin; 1290 aA->getrow_utriangular = PETSC_FALSE; 1291 PetscFunctionReturn(0); 1292 } 1293 1294 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1295 { 1296 PetscFunctionBegin; 1297 if (PetscDefined(USE_COMPLEX)) { 1298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1299 1300 PetscCall(MatConjugate(a->A)); 1301 PetscCall(MatConjugate(a->B)); 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1307 { 1308 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1309 1310 PetscFunctionBegin; 1311 PetscCall(MatRealPart(a->A)); 1312 PetscCall(MatRealPart(a->B)); 1313 PetscFunctionReturn(0); 1314 } 1315 1316 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1317 { 1318 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1319 1320 PetscFunctionBegin; 1321 PetscCall(MatImaginaryPart(a->A)); 1322 PetscCall(MatImaginaryPart(a->B)); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1327 Input: isrow - distributed(parallel), 1328 iscol_local - locally owned (seq) 1329 */ 1330 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1331 { 1332 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1333 const PetscInt *ptr1, *ptr2; 1334 1335 PetscFunctionBegin; 1336 PetscCall(ISGetLocalSize(isrow, &sz1)); 1337 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1338 if (sz1 > sz2) { 1339 *flg = PETSC_FALSE; 1340 PetscFunctionReturn(0); 1341 } 1342 1343 PetscCall(ISGetIndices(isrow, &ptr1)); 1344 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1345 1346 PetscCall(PetscMalloc1(sz1, &a1)); 1347 PetscCall(PetscMalloc1(sz2, &a2)); 1348 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1349 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1350 PetscCall(PetscSortInt(sz1, a1)); 1351 PetscCall(PetscSortInt(sz2, a2)); 1352 1353 nmatch = 0; 1354 k = 0; 1355 for (i = 0; i < sz1; i++) { 1356 for (j = k; j < sz2; j++) { 1357 if (a1[i] == a2[j]) { 1358 k = j; 1359 nmatch++; 1360 break; 1361 } 1362 } 1363 } 1364 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1365 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1366 PetscCall(PetscFree(a1)); 1367 PetscCall(PetscFree(a2)); 1368 if (nmatch < sz1) { 1369 *flg = PETSC_FALSE; 1370 } else { 1371 *flg = PETSC_TRUE; 1372 } 1373 PetscFunctionReturn(0); 1374 } 1375 1376 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1377 { 1378 IS iscol_local; 1379 PetscInt csize; 1380 PetscBool isequal; 1381 1382 PetscFunctionBegin; 1383 PetscCall(ISGetLocalSize(iscol, &csize)); 1384 if (call == MAT_REUSE_MATRIX) { 1385 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1386 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1387 } else { 1388 PetscBool issorted; 1389 1390 PetscCall(ISAllGather(iscol, &iscol_local)); 1391 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1392 PetscCall(ISSorted(iscol_local, &issorted)); 1393 PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 1394 } 1395 1396 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1397 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1398 if (call == MAT_INITIAL_MATRIX) { 1399 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1400 PetscCall(ISDestroy(&iscol_local)); 1401 } 1402 PetscFunctionReturn(0); 1403 } 1404 1405 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1406 { 1407 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1408 1409 PetscFunctionBegin; 1410 PetscCall(MatZeroEntries(l->A)); 1411 PetscCall(MatZeroEntries(l->B)); 1412 PetscFunctionReturn(0); 1413 } 1414 1415 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1416 { 1417 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1418 Mat A = a->A, B = a->B; 1419 PetscLogDouble isend[5], irecv[5]; 1420 1421 PetscFunctionBegin; 1422 info->block_size = (PetscReal)matin->rmap->bs; 1423 1424 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1425 1426 isend[0] = info->nz_used; 1427 isend[1] = info->nz_allocated; 1428 isend[2] = info->nz_unneeded; 1429 isend[3] = info->memory; 1430 isend[4] = info->mallocs; 1431 1432 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1433 1434 isend[0] += info->nz_used; 1435 isend[1] += info->nz_allocated; 1436 isend[2] += info->nz_unneeded; 1437 isend[3] += info->memory; 1438 isend[4] += info->mallocs; 1439 if (flag == MAT_LOCAL) { 1440 info->nz_used = isend[0]; 1441 info->nz_allocated = isend[1]; 1442 info->nz_unneeded = isend[2]; 1443 info->memory = isend[3]; 1444 info->mallocs = isend[4]; 1445 } else if (flag == MAT_GLOBAL_MAX) { 1446 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else if (flag == MAT_GLOBAL_SUM) { 1454 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1455 1456 info->nz_used = irecv[0]; 1457 info->nz_allocated = irecv[1]; 1458 info->nz_unneeded = irecv[2]; 1459 info->memory = irecv[3]; 1460 info->mallocs = irecv[4]; 1461 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1462 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1463 info->fill_ratio_needed = 0; 1464 info->factor_mallocs = 0; 1465 PetscFunctionReturn(0); 1466 } 1467 1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1469 { 1470 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1471 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1472 1473 PetscFunctionBegin; 1474 switch (op) { 1475 case MAT_NEW_NONZERO_LOCATIONS: 1476 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1477 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1478 case MAT_KEEP_NONZERO_PATTERN: 1479 case MAT_SUBMAT_SINGLEIS: 1480 case MAT_NEW_NONZERO_LOCATION_ERR: 1481 MatCheckPreallocated(A, 1); 1482 PetscCall(MatSetOption(a->A, op, flg)); 1483 PetscCall(MatSetOption(a->B, op, flg)); 1484 break; 1485 case MAT_ROW_ORIENTED: 1486 MatCheckPreallocated(A, 1); 1487 a->roworiented = flg; 1488 1489 PetscCall(MatSetOption(a->A, op, flg)); 1490 PetscCall(MatSetOption(a->B, op, flg)); 1491 break; 1492 case MAT_FORCE_DIAGONAL_ENTRIES: 1493 case MAT_SORTED_FULL: 1494 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1495 break; 1496 case MAT_IGNORE_OFF_PROC_ENTRIES: 1497 a->donotstash = flg; 1498 break; 1499 case MAT_USE_HASH_TABLE: 1500 a->ht_flag = flg; 1501 break; 1502 case MAT_HERMITIAN: 1503 MatCheckPreallocated(A, 1); 1504 PetscCall(MatSetOption(a->A, op, flg)); 1505 #if defined(PETSC_USE_COMPLEX) 1506 if (flg) { /* need different mat-vec ops */ 1507 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1508 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1509 A->ops->multtranspose = NULL; 1510 A->ops->multtransposeadd = NULL; 1511 A->symmetric = PETSC_BOOL3_FALSE; 1512 } 1513 #endif 1514 break; 1515 case MAT_SPD: 1516 case MAT_SYMMETRIC: 1517 MatCheckPreallocated(A, 1); 1518 PetscCall(MatSetOption(a->A, op, flg)); 1519 #if defined(PETSC_USE_COMPLEX) 1520 if (flg) { /* restore to use default mat-vec ops */ 1521 A->ops->mult = MatMult_MPISBAIJ; 1522 A->ops->multadd = MatMultAdd_MPISBAIJ; 1523 A->ops->multtranspose = MatMult_MPISBAIJ; 1524 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1525 } 1526 #endif 1527 break; 1528 case MAT_STRUCTURALLY_SYMMETRIC: 1529 MatCheckPreallocated(A, 1); 1530 PetscCall(MatSetOption(a->A, op, flg)); 1531 break; 1532 case MAT_SYMMETRY_ETERNAL: 1533 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1534 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1535 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1536 break; 1537 case MAT_SPD_ETERNAL: 1538 break; 1539 case MAT_IGNORE_LOWER_TRIANGULAR: 1540 aA->ignore_ltriangular = flg; 1541 break; 1542 case MAT_ERROR_LOWER_TRIANGULAR: 1543 aA->ignore_ltriangular = flg; 1544 break; 1545 case MAT_GETROW_UPPERTRIANGULAR: 1546 aA->getrow_utriangular = flg; 1547 break; 1548 default: 1549 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1550 } 1551 PetscFunctionReturn(0); 1552 } 1553 1554 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1555 { 1556 PetscFunctionBegin; 1557 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1558 if (reuse == MAT_INITIAL_MATRIX) { 1559 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1560 } else if (reuse == MAT_REUSE_MATRIX) { 1561 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 1566 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1567 { 1568 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1569 Mat a = baij->A, b = baij->B; 1570 PetscInt nv, m, n; 1571 PetscBool flg; 1572 1573 PetscFunctionBegin; 1574 if (ll != rr) { 1575 PetscCall(VecEqual(ll, rr, &flg)); 1576 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1577 } 1578 if (!ll) PetscFunctionReturn(0); 1579 1580 PetscCall(MatGetLocalSize(mat, &m, &n)); 1581 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1582 1583 PetscCall(VecGetLocalSize(rr, &nv)); 1584 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1585 1586 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1587 1588 /* left diagonalscale the off-diagonal part */ 1589 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1590 1591 /* scale the diagonal part */ 1592 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1593 1594 /* right diagonalscale the off-diagonal part */ 1595 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1596 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1601 { 1602 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1603 1604 PetscFunctionBegin; 1605 PetscCall(MatSetUnfactored(a->A)); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1610 1611 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1612 { 1613 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1614 Mat a, b, c, d; 1615 PetscBool flg; 1616 1617 PetscFunctionBegin; 1618 a = matA->A; 1619 b = matA->B; 1620 c = matB->A; 1621 d = matB->B; 1622 1623 PetscCall(MatEqual(a, c, &flg)); 1624 if (flg) PetscCall(MatEqual(b, d, &flg)); 1625 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1626 PetscFunctionReturn(0); 1627 } 1628 1629 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1630 { 1631 PetscBool isbaij; 1632 1633 PetscFunctionBegin; 1634 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1635 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1636 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1637 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1638 PetscCall(MatGetRowUpperTriangular(A)); 1639 PetscCall(MatCopy_Basic(A, B, str)); 1640 PetscCall(MatRestoreRowUpperTriangular(A)); 1641 } else { 1642 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1643 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1644 1645 PetscCall(MatCopy(a->A, b->A, str)); 1646 PetscCall(MatCopy(a->B, b->B, str)); 1647 } 1648 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1653 { 1654 PetscFunctionBegin; 1655 PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1656 PetscFunctionReturn(0); 1657 } 1658 1659 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1660 { 1661 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1662 PetscBLASInt bnz, one = 1; 1663 Mat_SeqSBAIJ *xa, *ya; 1664 Mat_SeqBAIJ *xb, *yb; 1665 1666 PetscFunctionBegin; 1667 if (str == SAME_NONZERO_PATTERN) { 1668 PetscScalar alpha = a; 1669 xa = (Mat_SeqSBAIJ *)xx->A->data; 1670 ya = (Mat_SeqSBAIJ *)yy->A->data; 1671 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1672 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1673 xb = (Mat_SeqBAIJ *)xx->B->data; 1674 yb = (Mat_SeqBAIJ *)yy->B->data; 1675 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1676 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1677 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1678 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1679 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1680 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1681 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1682 } else { 1683 Mat B; 1684 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1685 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1686 PetscCall(MatGetRowUpperTriangular(X)); 1687 PetscCall(MatGetRowUpperTriangular(Y)); 1688 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1689 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1690 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1691 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1692 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1693 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1694 PetscCall(MatSetType(B, MATMPISBAIJ)); 1695 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1696 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1697 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1698 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1699 PetscCall(MatHeaderMerge(Y, &B)); 1700 PetscCall(PetscFree(nnz_d)); 1701 PetscCall(PetscFree(nnz_o)); 1702 PetscCall(MatRestoreRowUpperTriangular(X)); 1703 PetscCall(MatRestoreRowUpperTriangular(Y)); 1704 } 1705 PetscFunctionReturn(0); 1706 } 1707 1708 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1709 { 1710 PetscInt i; 1711 PetscBool flg; 1712 1713 PetscFunctionBegin; 1714 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1715 for (i = 0; i < n; i++) { 1716 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1717 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1718 } 1719 PetscFunctionReturn(0); 1720 } 1721 1722 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1723 { 1724 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1725 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1726 1727 PetscFunctionBegin; 1728 if (!Y->preallocated) { 1729 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1730 } else if (!aij->nz) { 1731 PetscInt nonew = aij->nonew; 1732 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1733 aij->nonew = nonew; 1734 } 1735 PetscCall(MatShift_Basic(Y, a)); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1740 { 1741 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1742 1743 PetscFunctionBegin; 1744 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1745 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1746 if (d) { 1747 PetscInt rstart; 1748 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1749 *d += rstart / A->rmap->bs; 1750 } 1751 PetscFunctionReturn(0); 1752 } 1753 1754 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1755 { 1756 PetscFunctionBegin; 1757 *a = ((Mat_MPISBAIJ *)A->data)->A; 1758 PetscFunctionReturn(0); 1759 } 1760 1761 /* -------------------------------------------------------------------*/ 1762 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1763 MatGetRow_MPISBAIJ, 1764 MatRestoreRow_MPISBAIJ, 1765 MatMult_MPISBAIJ, 1766 /* 4*/ MatMultAdd_MPISBAIJ, 1767 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1768 MatMultAdd_MPISBAIJ, 1769 NULL, 1770 NULL, 1771 NULL, 1772 /* 10*/ NULL, 1773 NULL, 1774 NULL, 1775 MatSOR_MPISBAIJ, 1776 MatTranspose_MPISBAIJ, 1777 /* 15*/ MatGetInfo_MPISBAIJ, 1778 MatEqual_MPISBAIJ, 1779 MatGetDiagonal_MPISBAIJ, 1780 MatDiagonalScale_MPISBAIJ, 1781 MatNorm_MPISBAIJ, 1782 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1783 MatAssemblyEnd_MPISBAIJ, 1784 MatSetOption_MPISBAIJ, 1785 MatZeroEntries_MPISBAIJ, 1786 /* 24*/ NULL, 1787 NULL, 1788 NULL, 1789 NULL, 1790 NULL, 1791 /* 29*/ MatSetUp_MPISBAIJ, 1792 NULL, 1793 NULL, 1794 MatGetDiagonalBlock_MPISBAIJ, 1795 NULL, 1796 /* 34*/ MatDuplicate_MPISBAIJ, 1797 NULL, 1798 NULL, 1799 NULL, 1800 NULL, 1801 /* 39*/ MatAXPY_MPISBAIJ, 1802 MatCreateSubMatrices_MPISBAIJ, 1803 MatIncreaseOverlap_MPISBAIJ, 1804 MatGetValues_MPISBAIJ, 1805 MatCopy_MPISBAIJ, 1806 /* 44*/ NULL, 1807 MatScale_MPISBAIJ, 1808 MatShift_MPISBAIJ, 1809 NULL, 1810 NULL, 1811 /* 49*/ NULL, 1812 NULL, 1813 NULL, 1814 NULL, 1815 NULL, 1816 /* 54*/ NULL, 1817 NULL, 1818 MatSetUnfactored_MPISBAIJ, 1819 NULL, 1820 MatSetValuesBlocked_MPISBAIJ, 1821 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1822 NULL, 1823 NULL, 1824 NULL, 1825 NULL, 1826 /* 64*/ NULL, 1827 NULL, 1828 NULL, 1829 NULL, 1830 NULL, 1831 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1832 NULL, 1833 MatConvert_MPISBAIJ_Basic, 1834 NULL, 1835 NULL, 1836 /* 74*/ NULL, 1837 NULL, 1838 NULL, 1839 NULL, 1840 NULL, 1841 /* 79*/ NULL, 1842 NULL, 1843 NULL, 1844 NULL, 1845 MatLoad_MPISBAIJ, 1846 /* 84*/ NULL, 1847 NULL, 1848 NULL, 1849 NULL, 1850 NULL, 1851 /* 89*/ NULL, 1852 NULL, 1853 NULL, 1854 NULL, 1855 NULL, 1856 /* 94*/ NULL, 1857 NULL, 1858 NULL, 1859 NULL, 1860 NULL, 1861 /* 99*/ NULL, 1862 NULL, 1863 NULL, 1864 MatConjugate_MPISBAIJ, 1865 NULL, 1866 /*104*/ NULL, 1867 MatRealPart_MPISBAIJ, 1868 MatImaginaryPart_MPISBAIJ, 1869 MatGetRowUpperTriangular_MPISBAIJ, 1870 MatRestoreRowUpperTriangular_MPISBAIJ, 1871 /*109*/ NULL, 1872 NULL, 1873 NULL, 1874 NULL, 1875 MatMissingDiagonal_MPISBAIJ, 1876 /*114*/ NULL, 1877 NULL, 1878 NULL, 1879 NULL, 1880 NULL, 1881 /*119*/ NULL, 1882 NULL, 1883 NULL, 1884 NULL, 1885 NULL, 1886 /*124*/ NULL, 1887 NULL, 1888 NULL, 1889 NULL, 1890 NULL, 1891 /*129*/ NULL, 1892 NULL, 1893 NULL, 1894 NULL, 1895 NULL, 1896 /*134*/ NULL, 1897 NULL, 1898 NULL, 1899 NULL, 1900 NULL, 1901 /*139*/ MatSetBlockSizes_Default, 1902 NULL, 1903 NULL, 1904 NULL, 1905 NULL, 1906 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1907 NULL, 1908 NULL, 1909 NULL, 1910 NULL, 1911 NULL, 1912 /*150*/ NULL, 1913 NULL}; 1914 1915 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1916 { 1917 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1918 PetscInt i, mbs, Mbs; 1919 PetscMPIInt size; 1920 1921 PetscFunctionBegin; 1922 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1923 PetscCall(PetscLayoutSetUp(B->rmap)); 1924 PetscCall(PetscLayoutSetUp(B->cmap)); 1925 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1926 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1927 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1928 1929 mbs = B->rmap->n / bs; 1930 Mbs = B->rmap->N / bs; 1931 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1932 1933 B->rmap->bs = bs; 1934 b->bs2 = bs * bs; 1935 b->mbs = mbs; 1936 b->Mbs = Mbs; 1937 b->nbs = B->cmap->n / bs; 1938 b->Nbs = B->cmap->N / bs; 1939 1940 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1941 b->rstartbs = B->rmap->rstart / bs; 1942 b->rendbs = B->rmap->rend / bs; 1943 1944 b->cstartbs = B->cmap->rstart / bs; 1945 b->cendbs = B->cmap->rend / bs; 1946 1947 #if defined(PETSC_USE_CTABLE) 1948 PetscCall(PetscHMapIDestroy(&b->colmap)); 1949 #else 1950 PetscCall(PetscFree(b->colmap)); 1951 #endif 1952 PetscCall(PetscFree(b->garray)); 1953 PetscCall(VecDestroy(&b->lvec)); 1954 PetscCall(VecScatterDestroy(&b->Mvctx)); 1955 PetscCall(VecDestroy(&b->slvec0)); 1956 PetscCall(VecDestroy(&b->slvec0b)); 1957 PetscCall(VecDestroy(&b->slvec1)); 1958 PetscCall(VecDestroy(&b->slvec1a)); 1959 PetscCall(VecDestroy(&b->slvec1b)); 1960 PetscCall(VecScatterDestroy(&b->sMvctx)); 1961 1962 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1963 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1964 PetscCall(MatDestroy(&b->B)); 1965 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1966 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1967 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1968 1969 if (!B->preallocated) { 1970 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1971 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1972 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1973 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1974 } 1975 1976 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1977 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1978 1979 B->preallocated = PETSC_TRUE; 1980 B->was_assembled = PETSC_FALSE; 1981 B->assembled = PETSC_FALSE; 1982 PetscFunctionReturn(0); 1983 } 1984 1985 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1986 { 1987 PetscInt m, rstart, cend; 1988 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1989 const PetscInt *JJ = NULL; 1990 PetscScalar *values = NULL; 1991 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 1992 PetscBool nooffprocentries; 1993 1994 PetscFunctionBegin; 1995 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 1996 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 1997 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 1998 PetscCall(PetscLayoutSetUp(B->rmap)); 1999 PetscCall(PetscLayoutSetUp(B->cmap)); 2000 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2001 m = B->rmap->n / bs; 2002 rstart = B->rmap->rstart / bs; 2003 cend = B->cmap->rend / bs; 2004 2005 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2006 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2007 for (i = 0; i < m; i++) { 2008 nz = ii[i + 1] - ii[i]; 2009 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2010 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2011 JJ = jj + ii[i]; 2012 bd = 0; 2013 for (j = 0; j < nz; j++) { 2014 if (*JJ >= i + rstart) break; 2015 JJ++; 2016 bd++; 2017 } 2018 d = 0; 2019 for (; j < nz; j++) { 2020 if (*JJ++ >= cend) break; 2021 d++; 2022 } 2023 d_nnz[i] = d; 2024 o_nnz[i] = nz - d - bd; 2025 nz = nz - bd; 2026 nz_max = PetscMax(nz_max, nz); 2027 } 2028 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2029 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2030 PetscCall(PetscFree2(d_nnz, o_nnz)); 2031 2032 values = (PetscScalar *)V; 2033 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2034 for (i = 0; i < m; i++) { 2035 PetscInt row = i + rstart; 2036 PetscInt ncols = ii[i + 1] - ii[i]; 2037 const PetscInt *icols = jj + ii[i]; 2038 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2039 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2040 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2041 } else { /* block ordering does not match so we can only insert one block at a time. */ 2042 PetscInt j; 2043 for (j = 0; j < ncols; j++) { 2044 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2045 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2046 } 2047 } 2048 } 2049 2050 if (!V) PetscCall(PetscFree(values)); 2051 nooffprocentries = B->nooffprocentries; 2052 B->nooffprocentries = PETSC_TRUE; 2053 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2054 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2055 B->nooffprocentries = nooffprocentries; 2056 2057 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /*MC 2062 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2063 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2064 the matrix is stored. 2065 2066 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2067 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2068 2069 Options Database Keys: 2070 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2071 2072 Note: 2073 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2074 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2075 2076 Level: beginner 2077 2078 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2079 M*/ 2080 2081 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2082 { 2083 Mat_MPISBAIJ *b; 2084 PetscBool flg = PETSC_FALSE; 2085 2086 PetscFunctionBegin; 2087 PetscCall(PetscNew(&b)); 2088 B->data = (void *)b; 2089 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2090 2091 B->ops->destroy = MatDestroy_MPISBAIJ; 2092 B->ops->view = MatView_MPISBAIJ; 2093 B->assembled = PETSC_FALSE; 2094 B->insertmode = NOT_SET_VALUES; 2095 2096 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2097 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2098 2099 /* build local table of row and column ownerships */ 2100 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2101 2102 /* build cache for off array entries formed */ 2103 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2104 2105 b->donotstash = PETSC_FALSE; 2106 b->colmap = NULL; 2107 b->garray = NULL; 2108 b->roworiented = PETSC_TRUE; 2109 2110 /* stuff used in block assembly */ 2111 b->barray = NULL; 2112 2113 /* stuff used for matrix vector multiply */ 2114 b->lvec = NULL; 2115 b->Mvctx = NULL; 2116 b->slvec0 = NULL; 2117 b->slvec0b = NULL; 2118 b->slvec1 = NULL; 2119 b->slvec1a = NULL; 2120 b->slvec1b = NULL; 2121 b->sMvctx = NULL; 2122 2123 /* stuff for MatGetRow() */ 2124 b->rowindices = NULL; 2125 b->rowvalues = NULL; 2126 b->getrowactive = PETSC_FALSE; 2127 2128 /* hash table stuff */ 2129 b->ht = NULL; 2130 b->hd = NULL; 2131 b->ht_size = 0; 2132 b->ht_flag = PETSC_FALSE; 2133 b->ht_fact = 0; 2134 b->ht_total_ct = 0; 2135 b->ht_insert_ct = 0; 2136 2137 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2138 b->ijonly = PETSC_FALSE; 2139 2140 b->in_loc = NULL; 2141 b->v_loc = NULL; 2142 b->n_loc = 0; 2143 2144 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2145 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2146 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2147 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2148 #if defined(PETSC_HAVE_ELEMENTAL) 2149 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2150 #endif 2151 #if defined(PETSC_HAVE_SCALAPACK) 2152 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2153 #endif 2154 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2155 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2156 2157 B->symmetric = PETSC_BOOL3_TRUE; 2158 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2159 B->symmetry_eternal = PETSC_TRUE; 2160 B->structural_symmetry_eternal = PETSC_TRUE; 2161 #if defined(PETSC_USE_COMPLEX) 2162 B->hermitian = PETSC_BOOL3_FALSE; 2163 #else 2164 B->hermitian = PETSC_BOOL3_TRUE; 2165 #endif 2166 2167 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2168 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2169 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2170 if (flg) { 2171 PetscReal fact = 1.39; 2172 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2173 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2174 if (fact <= 1.0) fact = 1.39; 2175 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2176 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2177 } 2178 PetscOptionsEnd(); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 /*MC 2183 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2184 2185 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2186 and `MATMPISBAIJ` otherwise. 2187 2188 Options Database Key: 2189 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions() 2190 2191 Level: beginner 2192 2193 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2194 M*/ 2195 2196 /*@C 2197 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2198 the user should preallocate the matrix storage by setting the parameters 2199 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2200 performance can be increased by more than a factor of 50. 2201 2202 Collective 2203 2204 Input Parameters: 2205 + B - the matrix 2206 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2207 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2208 . d_nz - number of block nonzeros per block row in diagonal portion of local 2209 submatrix (same for all local rows) 2210 . d_nnz - array containing the number of block nonzeros in the various block rows 2211 in the upper triangular and diagonal part of the in diagonal portion of the local 2212 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2213 for the diagonal entry and set a value even if it is zero. 2214 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2215 submatrix (same for all local rows). 2216 - o_nnz - array containing the number of nonzeros in the various block rows of the 2217 off-diagonal portion of the local submatrix that is right of the diagonal 2218 (possibly different for each block row) or NULL. 2219 2220 Options Database Keys: 2221 + -mat_no_unroll - uses code that does not unroll the loops in the 2222 block calculations (much slower) 2223 - -mat_block_size - size of the blocks to use 2224 2225 Notes: 2226 2227 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2228 than it must be used on all processors that share the object for that argument. 2229 2230 If the *_nnz parameter is given then the *_nz parameter is ignored 2231 2232 Storage Information: 2233 For a square global matrix we define each processor's diagonal portion 2234 to be its local rows and the corresponding columns (a square submatrix); 2235 each processor's off-diagonal portion encompasses the remainder of the 2236 local matrix (a rectangular submatrix). 2237 2238 The user can specify preallocated storage for the diagonal part of 2239 the local submatrix with either d_nz or d_nnz (not both). Set 2240 d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic 2241 memory allocation. Likewise, specify preallocated storage for the 2242 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2243 2244 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2245 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2246 You can also run with the option -info and look for messages with the string 2247 malloc in them to see if additional memory allocation was needed. 2248 2249 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2250 the figure below we depict these three local rows and all columns (0-11). 2251 2252 .vb 2253 0 1 2 3 4 5 6 7 8 9 10 11 2254 -------------------------- 2255 row 3 |. . . d d d o o o o o o 2256 row 4 |. . . d d d o o o o o o 2257 row 5 |. . . d d d o o o o o o 2258 -------------------------- 2259 .ve 2260 2261 Thus, any entries in the d locations are stored in the d (diagonal) 2262 submatrix, and any entries in the o locations are stored in the 2263 o (off-diagonal) submatrix. Note that the d matrix is stored in 2264 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2265 2266 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2267 plus the diagonal part of the d matrix, 2268 and o_nz should indicate the number of block nonzeros per row in the o matrix 2269 2270 In general, for PDE problems in which most nonzeros are near the diagonal, 2271 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2272 or you will get TERRIBLE performance; see the users' manual chapter on 2273 matrices. 2274 2275 Level: intermediate 2276 2277 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2278 @*/ 2279 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2280 { 2281 PetscFunctionBegin; 2282 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2283 PetscValidType(B, 1); 2284 PetscValidLogicalCollectiveInt(B, bs, 2); 2285 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2286 PetscFunctionReturn(0); 2287 } 2288 2289 /*@C 2290 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2291 (block compressed row). For good matrix assembly performance 2292 the user should preallocate the matrix storage by setting the parameters 2293 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2294 performance can be increased by more than a factor of 50. 2295 2296 Collective 2297 2298 Input Parameters: 2299 + comm - MPI communicator 2300 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2301 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2302 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2303 This value should be the same as the local size used in creating the 2304 y vector for the matrix-vector product y = Ax. 2305 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2306 This value should be the same as the local size used in creating the 2307 x vector for the matrix-vector product y = Ax. 2308 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2309 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2310 . d_nz - number of block nonzeros per block row in diagonal portion of local 2311 submatrix (same for all local rows) 2312 . d_nnz - array containing the number of block nonzeros in the various block rows 2313 in the upper triangular portion of the in diagonal portion of the local 2314 (possibly different for each block block row) or NULL. 2315 If you plan to factor the matrix you must leave room for the diagonal entry and 2316 set its value even if it is zero. 2317 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2318 submatrix (same for all local rows). 2319 - o_nnz - array containing the number of nonzeros in the various block rows of the 2320 off-diagonal portion of the local submatrix (possibly different for 2321 each block row) or NULL. 2322 2323 Output Parameter: 2324 . A - the matrix 2325 2326 Options Database Keys: 2327 + -mat_no_unroll - uses code that does not unroll the loops in the 2328 block calculations (much slower) 2329 . -mat_block_size - size of the blocks to use 2330 - -mat_mpi - use the parallel matrix data structures even on one processor 2331 (defaults to using SeqBAIJ format on one processor) 2332 2333 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2334 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2335 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2336 2337 Notes: 2338 The number of rows and columns must be divisible by blocksize. 2339 This matrix type does not support complex Hermitian operation. 2340 2341 The user MUST specify either the local or global matrix dimensions 2342 (possibly both). 2343 2344 If` PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2345 than it must be used on all processors that share the object for that argument. 2346 2347 If the *_nnz parameter is given then the *_nz parameter is ignored 2348 2349 Storage Information: 2350 For a square global matrix we define each processor's diagonal portion 2351 to be its local rows and the corresponding columns (a square submatrix); 2352 each processor's off-diagonal portion encompasses the remainder of the 2353 local matrix (a rectangular submatrix). 2354 2355 The user can specify preallocated storage for the diagonal part of 2356 the local submatrix with either d_nz or d_nnz (not both). Set 2357 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2358 memory allocation. Likewise, specify preallocated storage for the 2359 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2360 2361 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2362 the figure below we depict these three local rows and all columns (0-11). 2363 2364 .vb 2365 0 1 2 3 4 5 6 7 8 9 10 11 2366 -------------------------- 2367 row 3 |. . . d d d o o o o o o 2368 row 4 |. . . d d d o o o o o o 2369 row 5 |. . . d d d o o o o o o 2370 -------------------------- 2371 .ve 2372 2373 Thus, any entries in the d locations are stored in the d (diagonal) 2374 submatrix, and any entries in the o locations are stored in the 2375 o (off-diagonal) submatrix. Note that the d matrix is stored in 2376 MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format. 2377 2378 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2379 plus the diagonal part of the d matrix, 2380 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2381 In general, for PDE problems in which most nonzeros are near the diagonal, 2382 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2383 or you will get TERRIBLE performance; see the users' manual chapter on 2384 matrices. 2385 2386 Level: intermediate 2387 2388 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2389 @*/ 2390 2391 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2392 { 2393 PetscMPIInt size; 2394 2395 PetscFunctionBegin; 2396 PetscCall(MatCreate(comm, A)); 2397 PetscCall(MatSetSizes(*A, m, n, M, N)); 2398 PetscCallMPI(MPI_Comm_size(comm, &size)); 2399 if (size > 1) { 2400 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2401 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2402 } else { 2403 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2404 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2405 } 2406 PetscFunctionReturn(0); 2407 } 2408 2409 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2410 { 2411 Mat mat; 2412 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2413 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2414 PetscScalar *array; 2415 2416 PetscFunctionBegin; 2417 *newmat = NULL; 2418 2419 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2420 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2421 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2422 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2423 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2424 2425 mat->factortype = matin->factortype; 2426 mat->preallocated = PETSC_TRUE; 2427 mat->assembled = PETSC_TRUE; 2428 mat->insertmode = NOT_SET_VALUES; 2429 2430 a = (Mat_MPISBAIJ *)mat->data; 2431 a->bs2 = oldmat->bs2; 2432 a->mbs = oldmat->mbs; 2433 a->nbs = oldmat->nbs; 2434 a->Mbs = oldmat->Mbs; 2435 a->Nbs = oldmat->Nbs; 2436 2437 a->size = oldmat->size; 2438 a->rank = oldmat->rank; 2439 a->donotstash = oldmat->donotstash; 2440 a->roworiented = oldmat->roworiented; 2441 a->rowindices = NULL; 2442 a->rowvalues = NULL; 2443 a->getrowactive = PETSC_FALSE; 2444 a->barray = NULL; 2445 a->rstartbs = oldmat->rstartbs; 2446 a->rendbs = oldmat->rendbs; 2447 a->cstartbs = oldmat->cstartbs; 2448 a->cendbs = oldmat->cendbs; 2449 2450 /* hash table stuff */ 2451 a->ht = NULL; 2452 a->hd = NULL; 2453 a->ht_size = 0; 2454 a->ht_flag = oldmat->ht_flag; 2455 a->ht_fact = oldmat->ht_fact; 2456 a->ht_total_ct = 0; 2457 a->ht_insert_ct = 0; 2458 2459 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2460 if (oldmat->colmap) { 2461 #if defined(PETSC_USE_CTABLE) 2462 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2463 #else 2464 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2465 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2466 #endif 2467 } else a->colmap = NULL; 2468 2469 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2470 PetscCall(PetscMalloc1(len, &a->garray)); 2471 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2472 } else a->garray = NULL; 2473 2474 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2475 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2476 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2477 2478 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2479 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2480 2481 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2482 PetscCall(VecGetArray(a->slvec1, &array)); 2483 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2484 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2485 PetscCall(VecRestoreArray(a->slvec1, &array)); 2486 PetscCall(VecGetArray(a->slvec0, &array)); 2487 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2488 PetscCall(VecRestoreArray(a->slvec0, &array)); 2489 2490 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2491 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2492 a->sMvctx = oldmat->sMvctx; 2493 2494 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2495 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2496 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2497 *newmat = mat; 2498 PetscFunctionReturn(0); 2499 } 2500 2501 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2502 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2503 2504 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2505 { 2506 PetscBool isbinary; 2507 2508 PetscFunctionBegin; 2509 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2510 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2511 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2516 { 2517 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2518 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2519 PetscReal atmp; 2520 PetscReal *work, *svalues, *rvalues; 2521 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2522 PetscMPIInt rank, size; 2523 PetscInt *rowners_bs, dest, count, source; 2524 PetscScalar *va; 2525 MatScalar *ba; 2526 MPI_Status stat; 2527 2528 PetscFunctionBegin; 2529 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2530 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2531 PetscCall(VecGetArray(v, &va)); 2532 2533 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2534 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2535 2536 bs = A->rmap->bs; 2537 mbs = a->mbs; 2538 Mbs = a->Mbs; 2539 ba = b->a; 2540 bi = b->i; 2541 bj = b->j; 2542 2543 /* find ownerships */ 2544 rowners_bs = A->rmap->range; 2545 2546 /* each proc creates an array to be distributed */ 2547 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2548 2549 /* row_max for B */ 2550 if (rank != size - 1) { 2551 for (i = 0; i < mbs; i++) { 2552 ncols = bi[1] - bi[0]; 2553 bi++; 2554 brow = bs * i; 2555 for (j = 0; j < ncols; j++) { 2556 bcol = bs * (*bj); 2557 for (kcol = 0; kcol < bs; kcol++) { 2558 col = bcol + kcol; /* local col index */ 2559 col += rowners_bs[rank + 1]; /* global col index */ 2560 for (krow = 0; krow < bs; krow++) { 2561 atmp = PetscAbsScalar(*ba); 2562 ba++; 2563 row = brow + krow; /* local row index */ 2564 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2565 if (work[col] < atmp) work[col] = atmp; 2566 } 2567 } 2568 bj++; 2569 } 2570 } 2571 2572 /* send values to its owners */ 2573 for (dest = rank + 1; dest < size; dest++) { 2574 svalues = work + rowners_bs[dest]; 2575 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2576 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2577 } 2578 } 2579 2580 /* receive values */ 2581 if (rank) { 2582 rvalues = work; 2583 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2584 for (source = 0; source < rank; source++) { 2585 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2586 /* process values */ 2587 for (i = 0; i < count; i++) { 2588 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2589 } 2590 } 2591 } 2592 2593 PetscCall(VecRestoreArray(v, &va)); 2594 PetscCall(PetscFree(work)); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2599 { 2600 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2601 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2602 PetscScalar *x, *ptr, *from; 2603 Vec bb1; 2604 const PetscScalar *b; 2605 2606 PetscFunctionBegin; 2607 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2608 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2609 2610 if (flag == SOR_APPLY_UPPER) { 2611 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2612 PetscFunctionReturn(0); 2613 } 2614 2615 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2616 if (flag & SOR_ZERO_INITIAL_GUESS) { 2617 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2618 its--; 2619 } 2620 2621 PetscCall(VecDuplicate(bb, &bb1)); 2622 while (its--) { 2623 /* lower triangular part: slvec0b = - B^T*xx */ 2624 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2625 2626 /* copy xx into slvec0a */ 2627 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2628 PetscCall(VecGetArray(xx, &x)); 2629 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2630 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2631 2632 PetscCall(VecScale(mat->slvec0, -1.0)); 2633 2634 /* copy bb into slvec1a */ 2635 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2636 PetscCall(VecGetArrayRead(bb, &b)); 2637 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2638 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2639 2640 /* set slvec1b = 0 */ 2641 PetscCall(VecSet(mat->slvec1b, 0.0)); 2642 2643 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2644 PetscCall(VecRestoreArray(xx, &x)); 2645 PetscCall(VecRestoreArrayRead(bb, &b)); 2646 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2647 2648 /* upper triangular part: bb1 = bb1 - B*x */ 2649 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2650 2651 /* local diagonal sweep */ 2652 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2653 } 2654 PetscCall(VecDestroy(&bb1)); 2655 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2656 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2657 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2658 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2659 } else if (flag & SOR_EISENSTAT) { 2660 Vec xx1; 2661 PetscBool hasop; 2662 const PetscScalar *diag; 2663 PetscScalar *sl, scale = (omega - 2.0) / omega; 2664 PetscInt i, n; 2665 2666 if (!mat->xx1) { 2667 PetscCall(VecDuplicate(bb, &mat->xx1)); 2668 PetscCall(VecDuplicate(bb, &mat->bb1)); 2669 } 2670 xx1 = mat->xx1; 2671 bb1 = mat->bb1; 2672 2673 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2674 2675 if (!mat->diag) { 2676 /* this is wrong for same matrix with new nonzero values */ 2677 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2678 PetscCall(MatGetDiagonal(matin, mat->diag)); 2679 } 2680 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2681 2682 if (hasop) { 2683 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2684 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2685 } else { 2686 /* 2687 These two lines are replaced by code that may be a bit faster for a good compiler 2688 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2689 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2690 */ 2691 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2692 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2693 PetscCall(VecGetArrayRead(bb, &b)); 2694 PetscCall(VecGetArray(xx, &x)); 2695 PetscCall(VecGetLocalSize(xx, &n)); 2696 if (omega == 1.0) { 2697 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2698 PetscCall(PetscLogFlops(2.0 * n)); 2699 } else { 2700 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2701 PetscCall(PetscLogFlops(3.0 * n)); 2702 } 2703 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2704 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2705 PetscCall(VecRestoreArrayRead(bb, &b)); 2706 PetscCall(VecRestoreArray(xx, &x)); 2707 } 2708 2709 /* multiply off-diagonal portion of matrix */ 2710 PetscCall(VecSet(mat->slvec1b, 0.0)); 2711 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2712 PetscCall(VecGetArray(mat->slvec0, &from)); 2713 PetscCall(VecGetArray(xx, &x)); 2714 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2715 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2716 PetscCall(VecRestoreArray(xx, &x)); 2717 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2718 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2719 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2720 2721 /* local sweep */ 2722 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2723 PetscCall(VecAXPY(xx, 1.0, xx1)); 2724 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 /*@ 2729 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2730 CSR format the local rows. 2731 2732 Collective 2733 2734 Input Parameters: 2735 + comm - MPI communicator 2736 . bs - the block size, only a block size of 1 is supported 2737 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2738 . n - This value should be the same as the local size used in creating the 2739 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2740 calculated if N is given) For square matrices n is almost always m. 2741 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2742 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2743 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2744 . j - column indices 2745 - a - matrix values 2746 2747 Output Parameter: 2748 . mat - the matrix 2749 2750 Level: intermediate 2751 2752 Notes: 2753 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2754 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2755 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2756 2757 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2758 2759 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2760 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2761 @*/ 2762 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2763 { 2764 PetscFunctionBegin; 2765 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2766 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2767 PetscCall(MatCreate(comm, mat)); 2768 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2769 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2770 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2771 PetscFunctionReturn(0); 2772 } 2773 2774 /*@C 2775 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2776 2777 Collective 2778 2779 Input Parameters: 2780 + B - the matrix 2781 . bs - the block size 2782 . i - the indices into j for the start of each local row (starts with zero) 2783 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2784 - v - optional values in the matrix 2785 2786 Level: advanced 2787 2788 Notes: 2789 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2790 and usually the numerical values as well 2791 2792 Any entries below the diagonal are ignored 2793 2794 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2795 @*/ 2796 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2797 { 2798 PetscFunctionBegin; 2799 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2800 PetscFunctionReturn(0); 2801 } 2802 2803 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2804 { 2805 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2806 PetscInt *indx; 2807 PetscScalar *values; 2808 2809 PetscFunctionBegin; 2810 PetscCall(MatGetSize(inmat, &m, &N)); 2811 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2812 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2813 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2814 PetscInt *bindx, rmax = a->rmax, j; 2815 PetscMPIInt rank, size; 2816 2817 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2818 mbs = m / bs; 2819 Nbs = N / cbs; 2820 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2821 nbs = n / cbs; 2822 2823 PetscCall(PetscMalloc1(rmax, &bindx)); 2824 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2825 2826 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2827 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2828 if (rank == size - 1) { 2829 /* Check sum(nbs) = Nbs */ 2830 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2831 } 2832 2833 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2834 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2835 for (i = 0; i < mbs; i++) { 2836 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2837 nnz = nnz / bs; 2838 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2839 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2840 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2841 } 2842 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2843 PetscCall(PetscFree(bindx)); 2844 2845 PetscCall(MatCreate(comm, outmat)); 2846 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2847 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2848 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2849 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2850 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2851 MatPreallocateEnd(dnz, onz); 2852 } 2853 2854 /* numeric phase */ 2855 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2856 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2857 2858 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2859 for (i = 0; i < m; i++) { 2860 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2861 Ii = i + rstart; 2862 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2863 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2864 } 2865 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2866 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2867 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2868 PetscFunctionReturn(0); 2869 } 2870