1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 a_noinsert:; \ 238 ailen[brow] = nrow; \ 239 } while (0) 240 241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 242 do { \ 243 brow = row / bs; \ 244 rp = bj + bi[brow]; \ 245 ap = ba + bs2 * bi[brow]; \ 246 rmax = bimax[brow]; \ 247 nrow = bilen[brow]; \ 248 bcol = col / bs; \ 249 ridx = row % bs; \ 250 cidx = col % bs; \ 251 low = 0; \ 252 high = nrow; \ 253 while (high - low > 3) { \ 254 t = (low + high) / 2; \ 255 if (rp[t] > bcol) high = t; \ 256 else low = t; \ 257 } \ 258 for (_i = low; _i < high; _i++) { \ 259 if (rp[_i] > bcol) break; \ 260 if (rp[_i] == bcol) { \ 261 bap = ap + bs2 * _i + bs * cidx + ridx; \ 262 if (addv == ADD_VALUES) *bap += value; \ 263 else *bap = value; \ 264 goto b_noinsert; \ 265 } \ 266 } \ 267 if (b->nonew == 1) goto b_noinsert; \ 268 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 269 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 270 N = nrow++ - 1; \ 271 /* shift up all the later entries in this row */ \ 272 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 273 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 274 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 275 rp[_i] = bcol; \ 276 ap[bs2 * _i + bs * cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } while (0) 280 281 /* Only add/insert a(i,j) with i<=j (blocks). 282 Any a(i,j) with i>j input by user is ignored or generates an error 283 */ 284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 285 { 286 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 287 MatScalar value; 288 PetscBool roworiented = baij->roworiented; 289 PetscInt i, j, row, col; 290 PetscInt rstart_orig = mat->rmap->rstart; 291 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 292 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 293 294 /* Some Variables required in the macro */ 295 Mat A = baij->A; 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 297 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 298 MatScalar *aa = a->a; 299 300 Mat B = baij->B; 301 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 302 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 303 MatScalar *ba = b->a; 304 305 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 306 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 307 MatScalar *ap, *bap; 308 309 /* for stash */ 310 PetscInt n_loc, *in_loc = NULL; 311 MatScalar *v_loc = NULL; 312 313 PetscFunctionBegin; 314 if (!baij->donotstash) { 315 if (n > baij->n_loc) { 316 PetscCall(PetscFree(baij->in_loc)); 317 PetscCall(PetscFree(baij->v_loc)); 318 PetscCall(PetscMalloc1(n, &baij->in_loc)); 319 PetscCall(PetscMalloc1(n, &baij->v_loc)); 320 321 baij->n_loc = n; 322 } 323 in_loc = baij->in_loc; 324 v_loc = baij->v_loc; 325 } 326 327 for (i = 0; i < m; i++) { 328 if (im[i] < 0) continue; 329 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 330 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 331 row = im[i] - rstart_orig; /* local row index */ 332 for (j = 0; j < n; j++) { 333 if (im[i] / bs > in[j] / bs) { 334 if (a->ignore_ltriangular) { 335 continue; /* ignore lower triangular blocks */ 336 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 337 } 338 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 339 col = in[j] - cstart_orig; /* local col index */ 340 brow = row / bs; 341 bcol = col / bs; 342 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 343 if (roworiented) value = v[i * n + j]; 344 else value = v[i + j * m]; 345 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 346 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 347 } else if (in[j] < 0) { 348 continue; 349 } else { 350 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 351 /* off-diag entry (B) */ 352 if (mat->was_assembled) { 353 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 354 #if defined(PETSC_USE_CTABLE) 355 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 356 col = col - 1; 357 #else 358 col = baij->colmap[in[j] / bs] - 1; 359 #endif 360 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) { 361 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 362 col = in[j]; 363 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 364 B = baij->B; 365 b = (Mat_SeqBAIJ *)B->data; 366 bimax = b->imax; 367 bi = b->i; 368 bilen = b->ilen; 369 bj = b->j; 370 ba = b->a; 371 } else col += in[j] % bs; 372 } else col = in[j]; 373 if (roworiented) value = v[i * n + j]; 374 else value = v[i + j * m]; 375 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 376 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 377 } 378 } 379 } else { /* off processor entry */ 380 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 381 if (!baij->donotstash) { 382 mat->assembled = PETSC_FALSE; 383 n_loc = 0; 384 for (j = 0; j < n; j++) { 385 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 386 in_loc[n_loc] = in[j]; 387 if (roworiented) { 388 v_loc[n_loc] = v[i * n + j]; 389 } else { 390 v_loc[n_loc] = v[j * m + i]; 391 } 392 n_loc++; 393 } 394 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 395 } 396 } 397 } 398 PetscFunctionReturn(PETSC_SUCCESS); 399 } 400 401 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 402 { 403 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 404 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 405 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 406 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 407 PetscBool roworiented = a->roworiented; 408 const PetscScalar *value = v; 409 MatScalar *ap, *aa = a->a, *bap; 410 411 PetscFunctionBegin; 412 if (col < row) { 413 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 414 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 415 } 416 rp = aj + ai[row]; 417 ap = aa + bs2 * ai[row]; 418 rmax = imax[row]; 419 nrow = ailen[row]; 420 value = v; 421 low = 0; 422 high = nrow; 423 424 while (high - low > 7) { 425 t = (low + high) / 2; 426 if (rp[t] > col) high = t; 427 else low = t; 428 } 429 for (i = low; i < high; i++) { 430 if (rp[i] > col) break; 431 if (rp[i] == col) { 432 bap = ap + bs2 * i; 433 if (roworiented) { 434 if (is == ADD_VALUES) { 435 for (ii = 0; ii < bs; ii++) { 436 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 437 } 438 } else { 439 for (ii = 0; ii < bs; ii++) { 440 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 441 } 442 } 443 } else { 444 if (is == ADD_VALUES) { 445 for (ii = 0; ii < bs; ii++) { 446 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 447 } 448 } else { 449 for (ii = 0; ii < bs; ii++) { 450 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 451 } 452 } 453 } 454 goto noinsert2; 455 } 456 } 457 if (nonew == 1) goto noinsert2; 458 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 459 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 460 N = nrow++ - 1; 461 high++; 462 /* shift up all the later entries in this row */ 463 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 464 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 465 rp[i] = col; 466 bap = ap + bs2 * i; 467 if (roworiented) { 468 for (ii = 0; ii < bs; ii++) { 469 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 470 } 471 } else { 472 for (ii = 0; ii < bs; ii++) { 473 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 474 } 475 } 476 noinsert2:; 477 ailen[row] = nrow; 478 PetscFunctionReturn(PETSC_SUCCESS); 479 } 480 481 /* 482 This routine is exactly duplicated in mpibaij.c 483 */ 484 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 485 { 486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 487 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 488 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 489 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 490 PetscBool roworiented = a->roworiented; 491 const PetscScalar *value = v; 492 MatScalar *ap, *aa = a->a, *bap; 493 494 PetscFunctionBegin; 495 rp = aj + ai[row]; 496 ap = aa + bs2 * ai[row]; 497 rmax = imax[row]; 498 nrow = ailen[row]; 499 low = 0; 500 high = nrow; 501 value = v; 502 while (high - low > 7) { 503 t = (low + high) / 2; 504 if (rp[t] > col) high = t; 505 else low = t; 506 } 507 for (i = low; i < high; i++) { 508 if (rp[i] > col) break; 509 if (rp[i] == col) { 510 bap = ap + bs2 * i; 511 if (roworiented) { 512 if (is == ADD_VALUES) { 513 for (ii = 0; ii < bs; ii++) { 514 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 515 } 516 } else { 517 for (ii = 0; ii < bs; ii++) { 518 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 519 } 520 } 521 } else { 522 if (is == ADD_VALUES) { 523 for (ii = 0; ii < bs; ii++, value += bs) { 524 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 525 bap += bs; 526 } 527 } else { 528 for (ii = 0; ii < bs; ii++, value += bs) { 529 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 530 bap += bs; 531 } 532 } 533 } 534 goto noinsert2; 535 } 536 } 537 if (nonew == 1) goto noinsert2; 538 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 539 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 540 N = nrow++ - 1; 541 high++; 542 /* shift up all the later entries in this row */ 543 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 544 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 545 rp[i] = col; 546 bap = ap + bs2 * i; 547 if (roworiented) { 548 for (ii = 0; ii < bs; ii++) { 549 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 550 } 551 } else { 552 for (ii = 0; ii < bs; ii++) { 553 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 554 } 555 } 556 noinsert2:; 557 ailen[row] = nrow; 558 PetscFunctionReturn(PETSC_SUCCESS); 559 } 560 561 /* 562 This routine could be optimized by removing the need for the block copy below and passing stride information 563 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 564 */ 565 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 566 { 567 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 568 const MatScalar *value; 569 MatScalar *barray = baij->barray; 570 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 571 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 572 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 573 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 574 575 PetscFunctionBegin; 576 if (!barray) { 577 PetscCall(PetscMalloc1(bs2, &barray)); 578 baij->barray = barray; 579 } 580 581 if (roworiented) { 582 stepval = (n - 1) * bs; 583 } else { 584 stepval = (m - 1) * bs; 585 } 586 for (i = 0; i < m; i++) { 587 if (im[i] < 0) continue; 588 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 589 if (im[i] >= rstart && im[i] < rend) { 590 row = im[i] - rstart; 591 for (j = 0; j < n; j++) { 592 if (im[i] > in[j]) { 593 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 594 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 595 } 596 /* If NumCol = 1 then a copy is not required */ 597 if ((roworiented) && (n == 1)) { 598 barray = (MatScalar *)v + i * bs2; 599 } else if ((!roworiented) && (m == 1)) { 600 barray = (MatScalar *)v + j * bs2; 601 } else { /* Here a copy is required */ 602 if (roworiented) { 603 value = v + i * (stepval + bs) * bs + j * bs; 604 } else { 605 value = v + j * (stepval + bs) * bs + i * bs; 606 } 607 for (ii = 0; ii < bs; ii++, value += stepval) { 608 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 609 } 610 barray -= bs2; 611 } 612 613 if (in[j] >= cstart && in[j] < cend) { 614 col = in[j] - cstart; 615 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 616 } else if (in[j] < 0) { 617 continue; 618 } else { 619 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 620 if (mat->was_assembled) { 621 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 622 623 #if defined(PETSC_USE_CTABLE) 624 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 625 col = col < 1 ? -1 : (col - 1) / bs; 626 #else 627 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 628 #endif 629 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 630 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 631 col = in[j]; 632 } 633 } else col = in[j]; 634 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 635 } 636 } 637 } else { 638 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 639 if (!baij->donotstash) { 640 if (roworiented) { 641 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 642 } else { 643 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 644 } 645 } 646 } 647 } 648 PetscFunctionReturn(PETSC_SUCCESS); 649 } 650 651 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 652 { 653 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 654 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 655 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 656 657 PetscFunctionBegin; 658 for (i = 0; i < m; i++) { 659 if (idxm[i] < 0) continue; /* negative row */ 660 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 661 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 662 row = idxm[i] - bsrstart; 663 for (j = 0; j < n; j++) { 664 if (idxn[j] < 0) continue; /* negative column */ 665 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 666 if (idxn[j] >= bscstart && idxn[j] < bscend) { 667 col = idxn[j] - bscstart; 668 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 669 } else { 670 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 671 #if defined(PETSC_USE_CTABLE) 672 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 673 data--; 674 #else 675 data = baij->colmap[idxn[j] / bs] - 1; 676 #endif 677 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 678 else { 679 col = data + idxn[j] % bs; 680 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 681 } 682 } 683 } 684 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 685 } 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 690 { 691 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 692 PetscReal sum[2], *lnorm2; 693 694 PetscFunctionBegin; 695 if (baij->size == 1) { 696 PetscCall(MatNorm(baij->A, type, norm)); 697 } else { 698 if (type == NORM_FROBENIUS) { 699 PetscCall(PetscMalloc1(2, &lnorm2)); 700 PetscCall(MatNorm(baij->A, type, lnorm2)); 701 *lnorm2 = (*lnorm2) * (*lnorm2); 702 lnorm2++; /* squar power of norm(A) */ 703 PetscCall(MatNorm(baij->B, type, lnorm2)); 704 *lnorm2 = (*lnorm2) * (*lnorm2); 705 lnorm2--; /* squar power of norm(B) */ 706 PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 707 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 708 PetscCall(PetscFree(lnorm2)); 709 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 710 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 711 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 712 PetscReal *rsum, *rsum2, vabs; 713 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 714 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 715 MatScalar *v; 716 PetscMPIInt iN; 717 718 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 719 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 720 /* Amat */ 721 v = amat->a; 722 jj = amat->j; 723 for (brow = 0; brow < mbs; brow++) { 724 grow = bs * (rstart + brow); 725 nz = amat->i[brow + 1] - amat->i[brow]; 726 for (bcol = 0; bcol < nz; bcol++) { 727 gcol = bs * (rstart + *jj); 728 jj++; 729 for (col = 0; col < bs; col++) { 730 for (row = 0; row < bs; row++) { 731 vabs = PetscAbsScalar(*v); 732 v++; 733 rsum[gcol + col] += vabs; 734 /* non-diagonal block */ 735 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 736 } 737 } 738 } 739 PetscCall(PetscLogFlops(nz * bs * bs)); 740 } 741 /* Bmat */ 742 v = bmat->a; 743 jj = bmat->j; 744 for (brow = 0; brow < mbs; brow++) { 745 grow = bs * (rstart + brow); 746 nz = bmat->i[brow + 1] - bmat->i[brow]; 747 for (bcol = 0; bcol < nz; bcol++) { 748 gcol = bs * garray[*jj]; 749 jj++; 750 for (col = 0; col < bs; col++) { 751 for (row = 0; row < bs; row++) { 752 vabs = PetscAbsScalar(*v); 753 v++; 754 rsum[gcol + col] += vabs; 755 rsum[grow + row] += vabs; 756 } 757 } 758 } 759 PetscCall(PetscLogFlops(nz * bs * bs)); 760 } 761 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 762 PetscCallMPI(MPIU_Allreduce(rsum, rsum2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 763 *norm = 0.0; 764 for (col = 0; col < mat->cmap->N; col++) { 765 if (rsum2[col] > *norm) *norm = rsum2[col]; 766 } 767 PetscCall(PetscFree2(rsum, rsum2)); 768 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 769 } 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 774 { 775 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 776 PetscInt nstash, reallocs; 777 778 PetscFunctionBegin; 779 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 780 781 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 782 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 783 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 784 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 785 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 786 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 791 { 792 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 793 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 794 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 795 PetscInt *row, *col; 796 PetscBool other_disassembled; 797 PetscMPIInt n; 798 PetscBool r1, r2, r3; 799 MatScalar *val; 800 801 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 802 PetscFunctionBegin; 803 if (!baij->donotstash && !mat->nooffprocentries) { 804 while (1) { 805 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 806 if (!flg) break; 807 808 for (i = 0; i < n;) { 809 /* Now identify the consecutive vals belonging to the same row */ 810 for (j = i, rstart = row[j]; j < n; j++) { 811 if (row[j] != rstart) break; 812 } 813 if (j < n) ncols = j - i; 814 else ncols = n - i; 815 /* Now assemble all these values with a single function call */ 816 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 817 i = j; 818 } 819 } 820 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 821 /* Now process the block-stash. Since the values are stashed column-oriented, 822 set the row-oriented flag to column-oriented, and after MatSetValues() 823 restore the original flags */ 824 r1 = baij->roworiented; 825 r2 = a->roworiented; 826 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 827 828 baij->roworiented = PETSC_FALSE; 829 a->roworiented = PETSC_FALSE; 830 831 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 832 while (1) { 833 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 834 if (!flg) break; 835 836 for (i = 0; i < n;) { 837 /* Now identify the consecutive vals belonging to the same row */ 838 for (j = i, rstart = row[j]; j < n; j++) { 839 if (row[j] != rstart) break; 840 } 841 if (j < n) ncols = j - i; 842 else ncols = n - i; 843 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 844 i = j; 845 } 846 } 847 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 848 849 baij->roworiented = r1; 850 a->roworiented = r2; 851 852 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 853 } 854 855 PetscCall(MatAssemblyBegin(baij->A, mode)); 856 PetscCall(MatAssemblyEnd(baij->A, mode)); 857 858 /* determine if any processor has disassembled, if so we must 859 also disassemble ourselves, in order that we may reassemble. */ 860 /* 861 if nonzero structure of submatrix B cannot change then we know that 862 no processor disassembled thus we can skip this stuff 863 */ 864 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 865 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 866 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 867 } 868 869 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 870 PetscCall(MatAssemblyBegin(baij->B, mode)); 871 PetscCall(MatAssemblyEnd(baij->B, mode)); 872 873 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 874 875 baij->rowvalues = NULL; 876 877 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 878 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 879 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 880 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 881 } 882 PetscFunctionReturn(PETSC_SUCCESS); 883 } 884 885 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 886 #include <petscdraw.h> 887 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 888 { 889 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 890 PetscInt bs = mat->rmap->bs; 891 PetscMPIInt rank = baij->rank; 892 PetscBool iascii, isdraw; 893 PetscViewer sviewer; 894 PetscViewerFormat format; 895 896 PetscFunctionBegin; 897 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 898 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 899 if (iascii) { 900 PetscCall(PetscViewerGetFormat(viewer, &format)); 901 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 902 MatInfo info; 903 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 904 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 905 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 906 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 907 mat->rmap->bs, (double)info.memory)); 908 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 909 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 910 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 911 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 912 PetscCall(PetscViewerFlush(viewer)); 913 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 914 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 915 PetscCall(VecScatterView(baij->Mvctx, viewer)); 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } else if (format == PETSC_VIEWER_ASCII_INFO) { 918 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 919 PetscFunctionReturn(PETSC_SUCCESS); 920 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 } 924 925 if (isdraw) { 926 PetscDraw draw; 927 PetscBool isnull; 928 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 929 PetscCall(PetscDrawIsNull(draw, &isnull)); 930 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 931 } 932 933 { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqSBAIJ *Aloc; 937 Mat_SeqBAIJ *Bloc; 938 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 939 MatScalar *a; 940 const char *matname; 941 942 /* Should this be the same type as mat? */ 943 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 944 if (rank == 0) { 945 PetscCall(MatSetSizes(A, M, N, M, N)); 946 } else { 947 PetscCall(MatSetSizes(A, 0, 0, M, N)); 948 } 949 PetscCall(MatSetType(A, MATMPISBAIJ)); 950 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 951 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 955 ai = Aloc->i; 956 aj = Aloc->j; 957 a = Aloc->a; 958 PetscCall(PetscMalloc1(bs, &rvals)); 959 960 for (i = 0; i < mbs; i++) { 961 rvals[0] = bs * (baij->rstartbs + i); 962 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 963 for (j = ai[i]; j < ai[i + 1]; j++) { 964 col = (baij->cstartbs + aj[j]) * bs; 965 for (k = 0; k < bs; k++) { 966 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 967 col++; 968 a += bs; 969 } 970 } 971 } 972 /* copy over the B part */ 973 Bloc = (Mat_SeqBAIJ *)baij->B->data; 974 ai = Bloc->i; 975 aj = Bloc->j; 976 a = Bloc->a; 977 for (i = 0; i < mbs; i++) { 978 rvals[0] = bs * (baij->rstartbs + i); 979 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 980 for (j = ai[i]; j < ai[i + 1]; j++) { 981 col = baij->garray[aj[j]] * bs; 982 for (k = 0; k < bs; k++) { 983 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 984 col++; 985 a += bs; 986 } 987 } 988 } 989 PetscCall(PetscFree(rvals)); 990 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 991 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 992 /* 993 Everyone has to call to draw the matrix since the graphics waits are 994 synchronized across all processors that share the PetscDraw object 995 */ 996 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 997 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 998 if (rank == 0) { 999 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname)); 1000 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer)); 1001 } 1002 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1003 PetscCall(MatDestroy(&A)); 1004 } 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1009 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1010 1011 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1012 { 1013 PetscBool iascii, isdraw, issocket, isbinary; 1014 1015 PetscFunctionBegin; 1016 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1020 if (iascii || isdraw || issocket) { 1021 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1022 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1023 PetscFunctionReturn(PETSC_SUCCESS); 1024 } 1025 1026 #if defined(PETSC_USE_COMPLEX) 1027 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1028 { 1029 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1030 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1031 PetscScalar *from; 1032 const PetscScalar *x; 1033 1034 PetscFunctionBegin; 1035 /* diagonal part */ 1036 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1037 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1038 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1039 PetscCall(VecZeroEntries(a->slvec1b)); 1040 1041 /* subdiagonal part */ 1042 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1043 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1044 1045 /* copy x into the vec slvec0 */ 1046 PetscCall(VecGetArray(a->slvec0, &from)); 1047 PetscCall(VecGetArrayRead(xx, &x)); 1048 1049 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1050 PetscCall(VecRestoreArray(a->slvec0, &from)); 1051 PetscCall(VecRestoreArrayRead(xx, &x)); 1052 1053 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1054 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1055 /* supperdiagonal part */ 1056 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1057 PetscFunctionReturn(PETSC_SUCCESS); 1058 } 1059 #endif 1060 1061 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1062 { 1063 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1064 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1065 PetscScalar *from; 1066 const PetscScalar *x; 1067 1068 PetscFunctionBegin; 1069 /* diagonal part */ 1070 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1071 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1072 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1073 PetscCall(VecZeroEntries(a->slvec1b)); 1074 1075 /* subdiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1077 1078 /* copy x into the vec slvec0 */ 1079 PetscCall(VecGetArray(a->slvec0, &from)); 1080 PetscCall(VecGetArrayRead(xx, &x)); 1081 1082 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1083 PetscCall(VecRestoreArray(a->slvec0, &from)); 1084 PetscCall(VecRestoreArrayRead(xx, &x)); 1085 1086 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1087 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1088 /* supperdiagonal part */ 1089 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1090 PetscFunctionReturn(PETSC_SUCCESS); 1091 } 1092 1093 #if PetscDefined(USE_COMPLEX) 1094 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1095 { 1096 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1097 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1098 PetscScalar *from; 1099 const PetscScalar *x; 1100 1101 PetscFunctionBegin; 1102 /* diagonal part */ 1103 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1104 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1105 PetscCall(VecZeroEntries(a->slvec1b)); 1106 1107 /* subdiagonal part */ 1108 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1109 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1110 1111 /* copy x into the vec slvec0 */ 1112 PetscCall(VecGetArray(a->slvec0, &from)); 1113 PetscCall(VecGetArrayRead(xx, &x)); 1114 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1115 PetscCall(VecRestoreArray(a->slvec0, &from)); 1116 1117 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1118 PetscCall(VecRestoreArrayRead(xx, &x)); 1119 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1120 1121 /* supperdiagonal part */ 1122 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1123 PetscFunctionReturn(PETSC_SUCCESS); 1124 } 1125 #endif 1126 1127 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1128 { 1129 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1130 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1131 PetscScalar *from; 1132 const PetscScalar *x; 1133 1134 PetscFunctionBegin; 1135 /* diagonal part */ 1136 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1137 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1138 PetscCall(VecZeroEntries(a->slvec1b)); 1139 1140 /* subdiagonal part */ 1141 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1142 1143 /* copy x into the vec slvec0 */ 1144 PetscCall(VecGetArray(a->slvec0, &from)); 1145 PetscCall(VecGetArrayRead(xx, &x)); 1146 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1147 PetscCall(VecRestoreArray(a->slvec0, &from)); 1148 1149 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1150 PetscCall(VecRestoreArrayRead(xx, &x)); 1151 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1152 1153 /* supperdiagonal part */ 1154 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1155 PetscFunctionReturn(PETSC_SUCCESS); 1156 } 1157 1158 /* 1159 This only works correctly for square matrices where the subblock A->A is the 1160 diagonal block 1161 */ 1162 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1163 { 1164 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1165 1166 PetscFunctionBegin; 1167 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1168 PetscCall(MatGetDiagonal(a->A, v)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1173 { 1174 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatScale(a->A, aa)); 1178 PetscCall(MatScale(a->B, aa)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1183 { 1184 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1185 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1186 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1187 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1188 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1189 1190 PetscFunctionBegin; 1191 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1192 mat->getrowactive = PETSC_TRUE; 1193 1194 if (!mat->rowvalues && (idx || v)) { 1195 /* 1196 allocate enough space to hold information from the longest row. 1197 */ 1198 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1199 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1200 PetscInt max = 1, mbs = mat->mbs, tmp; 1201 for (i = 0; i < mbs; i++) { 1202 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1203 if (max < tmp) max = tmp; 1204 } 1205 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1206 } 1207 1208 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1209 lrow = row - brstart; /* local row index */ 1210 1211 pvA = &vworkA; 1212 pcA = &cworkA; 1213 pvB = &vworkB; 1214 pcB = &cworkB; 1215 if (!v) { 1216 pvA = NULL; 1217 pvB = NULL; 1218 } 1219 if (!idx) { 1220 pcA = NULL; 1221 if (!v) pcB = NULL; 1222 } 1223 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1224 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1225 nztot = nzA + nzB; 1226 1227 cmap = mat->garray; 1228 if (v || idx) { 1229 if (nztot) { 1230 /* Sort by increasing column numbers, assuming A and B already sorted */ 1231 PetscInt imark = -1; 1232 if (v) { 1233 *v = v_p = mat->rowvalues; 1234 for (i = 0; i < nzB; i++) { 1235 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1236 else break; 1237 } 1238 imark = i; 1239 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1240 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1241 } 1242 if (idx) { 1243 *idx = idx_p = mat->rowindices; 1244 if (imark > -1) { 1245 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1246 } else { 1247 for (i = 0; i < nzB; i++) { 1248 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1249 else break; 1250 } 1251 imark = i; 1252 } 1253 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1254 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1255 } 1256 } else { 1257 if (idx) *idx = NULL; 1258 if (v) *v = NULL; 1259 } 1260 } 1261 *nz = nztot; 1262 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1263 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1264 PetscFunctionReturn(PETSC_SUCCESS); 1265 } 1266 1267 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1268 { 1269 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1270 1271 PetscFunctionBegin; 1272 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1273 baij->getrowactive = PETSC_FALSE; 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1278 { 1279 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1280 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1281 1282 PetscFunctionBegin; 1283 aA->getrow_utriangular = PETSC_TRUE; 1284 PetscFunctionReturn(PETSC_SUCCESS); 1285 } 1286 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1287 { 1288 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1289 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1290 1291 PetscFunctionBegin; 1292 aA->getrow_utriangular = PETSC_FALSE; 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 1296 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1297 { 1298 PetscFunctionBegin; 1299 if (PetscDefined(USE_COMPLEX)) { 1300 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1301 1302 PetscCall(MatConjugate(a->A)); 1303 PetscCall(MatConjugate(a->B)); 1304 } 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } 1307 1308 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1309 { 1310 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1311 1312 PetscFunctionBegin; 1313 PetscCall(MatRealPart(a->A)); 1314 PetscCall(MatRealPart(a->B)); 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } 1317 1318 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1319 { 1320 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1321 1322 PetscFunctionBegin; 1323 PetscCall(MatImaginaryPart(a->A)); 1324 PetscCall(MatImaginaryPart(a->B)); 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 1328 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1329 Input: isrow - distributed(parallel), 1330 iscol_local - locally owned (seq) 1331 */ 1332 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1333 { 1334 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1335 const PetscInt *ptr1, *ptr2; 1336 1337 PetscFunctionBegin; 1338 *flg = PETSC_FALSE; 1339 PetscCall(ISGetLocalSize(isrow, &sz1)); 1340 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1341 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1342 1343 PetscCall(ISGetIndices(isrow, &ptr1)); 1344 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1345 1346 PetscCall(PetscMalloc1(sz1, &a1)); 1347 PetscCall(PetscMalloc1(sz2, &a2)); 1348 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1349 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1350 PetscCall(PetscSortInt(sz1, a1)); 1351 PetscCall(PetscSortInt(sz2, a2)); 1352 1353 nmatch = 0; 1354 k = 0; 1355 for (i = 0; i < sz1; i++) { 1356 for (j = k; j < sz2; j++) { 1357 if (a1[i] == a2[j]) { 1358 k = j; 1359 nmatch++; 1360 break; 1361 } 1362 } 1363 } 1364 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1365 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1366 PetscCall(PetscFree(a1)); 1367 PetscCall(PetscFree(a2)); 1368 if (nmatch < sz1) { 1369 *flg = PETSC_FALSE; 1370 } else { 1371 *flg = PETSC_TRUE; 1372 } 1373 PetscFunctionReturn(PETSC_SUCCESS); 1374 } 1375 1376 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1377 { 1378 Mat C[2]; 1379 IS iscol_local, isrow_local; 1380 PetscInt csize, csize_local, rsize; 1381 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1382 1383 PetscFunctionBegin; 1384 PetscCall(ISGetLocalSize(iscol, &csize)); 1385 PetscCall(ISGetLocalSize(isrow, &rsize)); 1386 if (call == MAT_REUSE_MATRIX) { 1387 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1388 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1389 } else { 1390 PetscCall(ISAllGather(iscol, &iscol_local)); 1391 PetscCall(ISSorted(iscol_local, &issorted)); 1392 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1393 } 1394 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1395 if (!isequal) { 1396 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1397 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1398 if (!isidentity) { 1399 if (call == MAT_REUSE_MATRIX) { 1400 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1401 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1402 } else { 1403 PetscCall(ISAllGather(isrow, &isrow_local)); 1404 PetscCall(ISSorted(isrow_local, &issorted)); 1405 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1406 } 1407 } 1408 } 1409 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1410 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1411 if (!isequal && !isidentity) { 1412 if (call == MAT_INITIAL_MATRIX) { 1413 IS intersect; 1414 PetscInt ni; 1415 1416 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1417 PetscCall(ISGetLocalSize(intersect, &ni)); 1418 PetscCall(ISDestroy(&intersect)); 1419 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1420 } 1421 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1422 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1423 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1424 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1425 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1426 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1427 PetscCall(MatDestroy(C)); 1428 PetscCall(MatDestroy(C + 1)); 1429 } 1430 if (call == MAT_INITIAL_MATRIX) { 1431 if (!isequal && !isidentity) { 1432 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1433 PetscCall(ISDestroy(&isrow_local)); 1434 } 1435 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1436 PetscCall(ISDestroy(&iscol_local)); 1437 } 1438 PetscFunctionReturn(PETSC_SUCCESS); 1439 } 1440 1441 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1442 { 1443 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1444 1445 PetscFunctionBegin; 1446 PetscCall(MatZeroEntries(l->A)); 1447 PetscCall(MatZeroEntries(l->B)); 1448 PetscFunctionReturn(PETSC_SUCCESS); 1449 } 1450 1451 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1452 { 1453 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1454 Mat A = a->A, B = a->B; 1455 PetscLogDouble isend[5], irecv[5]; 1456 1457 PetscFunctionBegin; 1458 info->block_size = (PetscReal)matin->rmap->bs; 1459 1460 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1461 1462 isend[0] = info->nz_used; 1463 isend[1] = info->nz_allocated; 1464 isend[2] = info->nz_unneeded; 1465 isend[3] = info->memory; 1466 isend[4] = info->mallocs; 1467 1468 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1469 1470 isend[0] += info->nz_used; 1471 isend[1] += info->nz_allocated; 1472 isend[2] += info->nz_unneeded; 1473 isend[3] += info->memory; 1474 isend[4] += info->mallocs; 1475 if (flag == MAT_LOCAL) { 1476 info->nz_used = isend[0]; 1477 info->nz_allocated = isend[1]; 1478 info->nz_unneeded = isend[2]; 1479 info->memory = isend[3]; 1480 info->mallocs = isend[4]; 1481 } else if (flag == MAT_GLOBAL_MAX) { 1482 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1483 1484 info->nz_used = irecv[0]; 1485 info->nz_allocated = irecv[1]; 1486 info->nz_unneeded = irecv[2]; 1487 info->memory = irecv[3]; 1488 info->mallocs = irecv[4]; 1489 } else if (flag == MAT_GLOBAL_SUM) { 1490 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1491 1492 info->nz_used = irecv[0]; 1493 info->nz_allocated = irecv[1]; 1494 info->nz_unneeded = irecv[2]; 1495 info->memory = irecv[3]; 1496 info->mallocs = irecv[4]; 1497 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1498 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1499 info->fill_ratio_needed = 0; 1500 info->factor_mallocs = 0; 1501 PetscFunctionReturn(PETSC_SUCCESS); 1502 } 1503 1504 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1505 { 1506 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1507 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1508 1509 PetscFunctionBegin; 1510 switch (op) { 1511 case MAT_NEW_NONZERO_LOCATIONS: 1512 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1513 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1514 case MAT_KEEP_NONZERO_PATTERN: 1515 case MAT_NEW_NONZERO_LOCATION_ERR: 1516 MatCheckPreallocated(A, 1); 1517 PetscCall(MatSetOption(a->A, op, flg)); 1518 PetscCall(MatSetOption(a->B, op, flg)); 1519 break; 1520 case MAT_ROW_ORIENTED: 1521 MatCheckPreallocated(A, 1); 1522 a->roworiented = flg; 1523 1524 PetscCall(MatSetOption(a->A, op, flg)); 1525 PetscCall(MatSetOption(a->B, op, flg)); 1526 break; 1527 case MAT_FORCE_DIAGONAL_ENTRIES: 1528 case MAT_SORTED_FULL: 1529 case MAT_SUBMAT_SINGLEIS: 1530 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1531 break; 1532 case MAT_IGNORE_OFF_PROC_ENTRIES: 1533 a->donotstash = flg; 1534 break; 1535 case MAT_USE_HASH_TABLE: 1536 a->ht_flag = flg; 1537 break; 1538 case MAT_HERMITIAN: 1539 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1540 #if defined(PETSC_USE_COMPLEX) 1541 if (flg) { /* need different mat-vec ops */ 1542 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1543 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1544 A->ops->multtranspose = NULL; 1545 A->ops->multtransposeadd = NULL; 1546 A->symmetric = PETSC_BOOL3_FALSE; 1547 } 1548 #endif 1549 break; 1550 case MAT_SPD: 1551 case MAT_SYMMETRIC: 1552 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1553 #if defined(PETSC_USE_COMPLEX) 1554 if (flg) { /* restore to use default mat-vec ops */ 1555 A->ops->mult = MatMult_MPISBAIJ; 1556 A->ops->multadd = MatMultAdd_MPISBAIJ; 1557 A->ops->multtranspose = MatMult_MPISBAIJ; 1558 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1559 } 1560 #endif 1561 break; 1562 case MAT_STRUCTURALLY_SYMMETRIC: 1563 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1564 break; 1565 case MAT_SYMMETRY_ETERNAL: 1566 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1567 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1568 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1569 break; 1570 case MAT_SPD_ETERNAL: 1571 break; 1572 case MAT_IGNORE_LOWER_TRIANGULAR: 1573 case MAT_ERROR_LOWER_TRIANGULAR: 1574 aA->ignore_ltriangular = flg; 1575 break; 1576 case MAT_GETROW_UPPERTRIANGULAR: 1577 aA->getrow_utriangular = flg; 1578 break; 1579 default: 1580 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1581 } 1582 PetscFunctionReturn(PETSC_SUCCESS); 1583 } 1584 1585 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1586 { 1587 PetscFunctionBegin; 1588 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1589 if (reuse == MAT_INITIAL_MATRIX) { 1590 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1591 } else if (reuse == MAT_REUSE_MATRIX) { 1592 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1593 } 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1598 { 1599 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1600 Mat a = baij->A, b = baij->B; 1601 PetscInt nv, m, n; 1602 PetscBool flg; 1603 1604 PetscFunctionBegin; 1605 if (ll != rr) { 1606 PetscCall(VecEqual(ll, rr, &flg)); 1607 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1608 } 1609 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1610 1611 PetscCall(MatGetLocalSize(mat, &m, &n)); 1612 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1613 1614 PetscCall(VecGetLocalSize(rr, &nv)); 1615 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1616 1617 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1618 1619 /* left diagonalscale the off-diagonal part */ 1620 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1621 1622 /* scale the diagonal part */ 1623 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1624 1625 /* right diagonalscale the off-diagonal part */ 1626 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1627 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1628 PetscFunctionReturn(PETSC_SUCCESS); 1629 } 1630 1631 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1632 { 1633 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1634 1635 PetscFunctionBegin; 1636 PetscCall(MatSetUnfactored(a->A)); 1637 PetscFunctionReturn(PETSC_SUCCESS); 1638 } 1639 1640 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1641 1642 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1643 { 1644 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1645 Mat a, b, c, d; 1646 PetscBool flg; 1647 1648 PetscFunctionBegin; 1649 a = matA->A; 1650 b = matA->B; 1651 c = matB->A; 1652 d = matB->B; 1653 1654 PetscCall(MatEqual(a, c, &flg)); 1655 if (flg) PetscCall(MatEqual(b, d, &flg)); 1656 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1657 PetscFunctionReturn(PETSC_SUCCESS); 1658 } 1659 1660 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1661 { 1662 PetscBool isbaij; 1663 1664 PetscFunctionBegin; 1665 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1666 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1667 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1668 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1669 PetscCall(MatGetRowUpperTriangular(A)); 1670 PetscCall(MatCopy_Basic(A, B, str)); 1671 PetscCall(MatRestoreRowUpperTriangular(A)); 1672 } else { 1673 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1674 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1675 1676 PetscCall(MatCopy(a->A, b->A, str)); 1677 PetscCall(MatCopy(a->B, b->B, str)); 1678 } 1679 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1680 PetscFunctionReturn(PETSC_SUCCESS); 1681 } 1682 1683 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1684 { 1685 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1686 PetscBLASInt bnz, one = 1; 1687 Mat_SeqSBAIJ *xa, *ya; 1688 Mat_SeqBAIJ *xb, *yb; 1689 1690 PetscFunctionBegin; 1691 if (str == SAME_NONZERO_PATTERN) { 1692 PetscScalar alpha = a; 1693 xa = (Mat_SeqSBAIJ *)xx->A->data; 1694 ya = (Mat_SeqSBAIJ *)yy->A->data; 1695 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1696 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1697 xb = (Mat_SeqBAIJ *)xx->B->data; 1698 yb = (Mat_SeqBAIJ *)yy->B->data; 1699 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1700 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1701 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1702 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1703 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1704 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1705 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1706 } else { 1707 Mat B; 1708 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1709 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1710 PetscCall(MatGetRowUpperTriangular(X)); 1711 PetscCall(MatGetRowUpperTriangular(Y)); 1712 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1713 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1714 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1715 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1716 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1717 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1718 PetscCall(MatSetType(B, MATMPISBAIJ)); 1719 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1720 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1721 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1722 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1723 PetscCall(MatHeaderMerge(Y, &B)); 1724 PetscCall(PetscFree(nnz_d)); 1725 PetscCall(PetscFree(nnz_o)); 1726 PetscCall(MatRestoreRowUpperTriangular(X)); 1727 PetscCall(MatRestoreRowUpperTriangular(Y)); 1728 } 1729 PetscFunctionReturn(PETSC_SUCCESS); 1730 } 1731 1732 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1733 { 1734 PetscInt i; 1735 PetscBool flg; 1736 1737 PetscFunctionBegin; 1738 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1739 for (i = 0; i < n; i++) { 1740 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1741 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1742 } 1743 PetscFunctionReturn(PETSC_SUCCESS); 1744 } 1745 1746 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1747 { 1748 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1749 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1750 1751 PetscFunctionBegin; 1752 if (!Y->preallocated) { 1753 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1754 } else if (!aij->nz) { 1755 PetscInt nonew = aij->nonew; 1756 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1757 aij->nonew = nonew; 1758 } 1759 PetscCall(MatShift_Basic(Y, a)); 1760 PetscFunctionReturn(PETSC_SUCCESS); 1761 } 1762 1763 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1764 { 1765 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1766 1767 PetscFunctionBegin; 1768 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1769 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1770 if (d) { 1771 PetscInt rstart; 1772 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1773 *d += rstart / A->rmap->bs; 1774 } 1775 PetscFunctionReturn(PETSC_SUCCESS); 1776 } 1777 1778 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1779 { 1780 PetscFunctionBegin; 1781 *a = ((Mat_MPISBAIJ *)A->data)->A; 1782 PetscFunctionReturn(PETSC_SUCCESS); 1783 } 1784 1785 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1786 { 1787 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1788 1789 PetscFunctionBegin; 1790 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1791 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1792 PetscFunctionReturn(PETSC_SUCCESS); 1793 } 1794 1795 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer); 1796 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]); 1797 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 1798 1799 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1800 MatGetRow_MPISBAIJ, 1801 MatRestoreRow_MPISBAIJ, 1802 MatMult_MPISBAIJ, 1803 /* 4*/ MatMultAdd_MPISBAIJ, 1804 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1805 MatMultAdd_MPISBAIJ, 1806 NULL, 1807 NULL, 1808 NULL, 1809 /* 10*/ NULL, 1810 NULL, 1811 NULL, 1812 MatSOR_MPISBAIJ, 1813 MatTranspose_MPISBAIJ, 1814 /* 15*/ MatGetInfo_MPISBAIJ, 1815 MatEqual_MPISBAIJ, 1816 MatGetDiagonal_MPISBAIJ, 1817 MatDiagonalScale_MPISBAIJ, 1818 MatNorm_MPISBAIJ, 1819 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1820 MatAssemblyEnd_MPISBAIJ, 1821 MatSetOption_MPISBAIJ, 1822 MatZeroEntries_MPISBAIJ, 1823 /* 24*/ NULL, 1824 NULL, 1825 NULL, 1826 NULL, 1827 NULL, 1828 /* 29*/ MatSetUp_MPI_Hash, 1829 NULL, 1830 NULL, 1831 MatGetDiagonalBlock_MPISBAIJ, 1832 NULL, 1833 /* 34*/ MatDuplicate_MPISBAIJ, 1834 NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 /* 39*/ MatAXPY_MPISBAIJ, 1839 MatCreateSubMatrices_MPISBAIJ, 1840 MatIncreaseOverlap_MPISBAIJ, 1841 MatGetValues_MPISBAIJ, 1842 MatCopy_MPISBAIJ, 1843 /* 44*/ NULL, 1844 MatScale_MPISBAIJ, 1845 MatShift_MPISBAIJ, 1846 NULL, 1847 NULL, 1848 /* 49*/ NULL, 1849 NULL, 1850 NULL, 1851 NULL, 1852 NULL, 1853 /* 54*/ NULL, 1854 NULL, 1855 MatSetUnfactored_MPISBAIJ, 1856 NULL, 1857 MatSetValuesBlocked_MPISBAIJ, 1858 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1859 NULL, 1860 NULL, 1861 NULL, 1862 NULL, 1863 /* 64*/ NULL, 1864 NULL, 1865 NULL, 1866 NULL, 1867 NULL, 1868 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1869 NULL, 1870 MatConvert_MPISBAIJ_Basic, 1871 NULL, 1872 NULL, 1873 /* 74*/ NULL, 1874 NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 /* 79*/ NULL, 1879 NULL, 1880 NULL, 1881 NULL, 1882 MatLoad_MPISBAIJ, 1883 /* 84*/ NULL, 1884 NULL, 1885 NULL, 1886 NULL, 1887 NULL, 1888 /* 89*/ NULL, 1889 NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 /* 94*/ NULL, 1894 NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 /* 99*/ NULL, 1899 NULL, 1900 NULL, 1901 MatConjugate_MPISBAIJ, 1902 NULL, 1903 /*104*/ NULL, 1904 MatRealPart_MPISBAIJ, 1905 MatImaginaryPart_MPISBAIJ, 1906 MatGetRowUpperTriangular_MPISBAIJ, 1907 MatRestoreRowUpperTriangular_MPISBAIJ, 1908 /*109*/ NULL, 1909 NULL, 1910 NULL, 1911 NULL, 1912 MatMissingDiagonal_MPISBAIJ, 1913 /*114*/ NULL, 1914 NULL, 1915 NULL, 1916 NULL, 1917 NULL, 1918 /*119*/ NULL, 1919 NULL, 1920 NULL, 1921 NULL, 1922 NULL, 1923 /*124*/ NULL, 1924 NULL, 1925 NULL, 1926 NULL, 1927 NULL, 1928 /*129*/ NULL, 1929 NULL, 1930 NULL, 1931 NULL, 1932 NULL, 1933 /*134*/ NULL, 1934 NULL, 1935 NULL, 1936 NULL, 1937 NULL, 1938 /*139*/ MatSetBlockSizes_Default, 1939 NULL, 1940 NULL, 1941 NULL, 1942 NULL, 1943 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1944 NULL, 1945 NULL, 1946 NULL, 1947 NULL, 1948 NULL, 1949 /*150*/ NULL, 1950 MatEliminateZeros_MPISBAIJ, 1951 NULL, 1952 NULL, 1953 NULL, 1954 NULL}; 1955 1956 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1957 { 1958 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1959 PetscInt i, mbs, Mbs; 1960 PetscMPIInt size; 1961 1962 PetscFunctionBegin; 1963 if (B->hash_active) { 1964 B->ops[0] = b->cops; 1965 B->hash_active = PETSC_FALSE; 1966 } 1967 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1968 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1969 PetscCall(PetscLayoutSetUp(B->rmap)); 1970 PetscCall(PetscLayoutSetUp(B->cmap)); 1971 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1972 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1973 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1974 1975 mbs = B->rmap->n / bs; 1976 Mbs = B->rmap->N / bs; 1977 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1978 1979 B->rmap->bs = bs; 1980 b->bs2 = bs * bs; 1981 b->mbs = mbs; 1982 b->Mbs = Mbs; 1983 b->nbs = B->cmap->n / bs; 1984 b->Nbs = B->cmap->N / bs; 1985 1986 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1987 b->rstartbs = B->rmap->rstart / bs; 1988 b->rendbs = B->rmap->rend / bs; 1989 1990 b->cstartbs = B->cmap->rstart / bs; 1991 b->cendbs = B->cmap->rend / bs; 1992 1993 #if defined(PETSC_USE_CTABLE) 1994 PetscCall(PetscHMapIDestroy(&b->colmap)); 1995 #else 1996 PetscCall(PetscFree(b->colmap)); 1997 #endif 1998 PetscCall(PetscFree(b->garray)); 1999 PetscCall(VecDestroy(&b->lvec)); 2000 PetscCall(VecScatterDestroy(&b->Mvctx)); 2001 PetscCall(VecDestroy(&b->slvec0)); 2002 PetscCall(VecDestroy(&b->slvec0b)); 2003 PetscCall(VecDestroy(&b->slvec1)); 2004 PetscCall(VecDestroy(&b->slvec1a)); 2005 PetscCall(VecDestroy(&b->slvec1b)); 2006 PetscCall(VecScatterDestroy(&b->sMvctx)); 2007 2008 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2009 2010 MatSeqXAIJGetOptions_Private(b->B); 2011 PetscCall(MatDestroy(&b->B)); 2012 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2013 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2014 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2015 MatSeqXAIJRestoreOptions_Private(b->B); 2016 2017 MatSeqXAIJGetOptions_Private(b->A); 2018 PetscCall(MatDestroy(&b->A)); 2019 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2020 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2021 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 2022 MatSeqXAIJRestoreOptions_Private(b->A); 2023 2024 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2025 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2026 2027 B->preallocated = PETSC_TRUE; 2028 B->was_assembled = PETSC_FALSE; 2029 B->assembled = PETSC_FALSE; 2030 PetscFunctionReturn(PETSC_SUCCESS); 2031 } 2032 2033 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2034 { 2035 PetscInt m, rstart, cend; 2036 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2037 const PetscInt *JJ = NULL; 2038 PetscScalar *values = NULL; 2039 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2040 PetscBool nooffprocentries; 2041 2042 PetscFunctionBegin; 2043 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2044 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2045 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2046 PetscCall(PetscLayoutSetUp(B->rmap)); 2047 PetscCall(PetscLayoutSetUp(B->cmap)); 2048 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2049 m = B->rmap->n / bs; 2050 rstart = B->rmap->rstart / bs; 2051 cend = B->cmap->rend / bs; 2052 2053 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2054 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2055 for (i = 0; i < m; i++) { 2056 nz = ii[i + 1] - ii[i]; 2057 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2058 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2059 JJ = jj + ii[i]; 2060 bd = 0; 2061 for (j = 0; j < nz; j++) { 2062 if (*JJ >= i + rstart) break; 2063 JJ++; 2064 bd++; 2065 } 2066 d = 0; 2067 for (; j < nz; j++) { 2068 if (*JJ++ >= cend) break; 2069 d++; 2070 } 2071 d_nnz[i] = d; 2072 o_nnz[i] = nz - d - bd; 2073 nz = nz - bd; 2074 nz_max = PetscMax(nz_max, nz); 2075 } 2076 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2077 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2078 PetscCall(PetscFree2(d_nnz, o_nnz)); 2079 2080 values = (PetscScalar *)V; 2081 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2082 for (i = 0; i < m; i++) { 2083 PetscInt row = i + rstart; 2084 PetscInt ncols = ii[i + 1] - ii[i]; 2085 const PetscInt *icols = jj + ii[i]; 2086 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2087 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2088 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2089 } else { /* block ordering does not match so we can only insert one block at a time. */ 2090 PetscInt j; 2091 for (j = 0; j < ncols; j++) { 2092 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2093 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2094 } 2095 } 2096 } 2097 2098 if (!V) PetscCall(PetscFree(values)); 2099 nooffprocentries = B->nooffprocentries; 2100 B->nooffprocentries = PETSC_TRUE; 2101 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2102 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2103 B->nooffprocentries = nooffprocentries; 2104 2105 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2106 PetscFunctionReturn(PETSC_SUCCESS); 2107 } 2108 2109 /*MC 2110 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2111 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2112 the matrix is stored. 2113 2114 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2115 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2116 2117 Options Database Key: 2118 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2119 2120 Level: beginner 2121 2122 Note: 2123 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2124 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2125 2126 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2127 M*/ 2128 2129 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2130 { 2131 Mat_MPISBAIJ *b; 2132 PetscBool flg = PETSC_FALSE; 2133 2134 PetscFunctionBegin; 2135 PetscCall(PetscNew(&b)); 2136 B->data = (void *)b; 2137 B->ops[0] = MatOps_Values; 2138 2139 B->ops->destroy = MatDestroy_MPISBAIJ; 2140 B->ops->view = MatView_MPISBAIJ; 2141 B->assembled = PETSC_FALSE; 2142 B->insertmode = NOT_SET_VALUES; 2143 2144 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2145 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2146 2147 /* build local table of row and column ownerships */ 2148 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2149 2150 /* build cache for off array entries formed */ 2151 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2152 2153 b->donotstash = PETSC_FALSE; 2154 b->colmap = NULL; 2155 b->garray = NULL; 2156 b->roworiented = PETSC_TRUE; 2157 2158 /* stuff used in block assembly */ 2159 b->barray = NULL; 2160 2161 /* stuff used for matrix vector multiply */ 2162 b->lvec = NULL; 2163 b->Mvctx = NULL; 2164 b->slvec0 = NULL; 2165 b->slvec0b = NULL; 2166 b->slvec1 = NULL; 2167 b->slvec1a = NULL; 2168 b->slvec1b = NULL; 2169 b->sMvctx = NULL; 2170 2171 /* stuff for MatGetRow() */ 2172 b->rowindices = NULL; 2173 b->rowvalues = NULL; 2174 b->getrowactive = PETSC_FALSE; 2175 2176 /* hash table stuff */ 2177 b->ht = NULL; 2178 b->hd = NULL; 2179 b->ht_size = 0; 2180 b->ht_flag = PETSC_FALSE; 2181 b->ht_fact = 0; 2182 b->ht_total_ct = 0; 2183 b->ht_insert_ct = 0; 2184 2185 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2186 b->ijonly = PETSC_FALSE; 2187 2188 b->in_loc = NULL; 2189 b->v_loc = NULL; 2190 b->n_loc = 0; 2191 2192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2196 #if defined(PETSC_HAVE_ELEMENTAL) 2197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2198 #endif 2199 #if defined(PETSC_HAVE_SCALAPACK) 2200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2201 #endif 2202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2204 2205 B->symmetric = PETSC_BOOL3_TRUE; 2206 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2207 B->symmetry_eternal = PETSC_TRUE; 2208 B->structural_symmetry_eternal = PETSC_TRUE; 2209 #if defined(PETSC_USE_COMPLEX) 2210 B->hermitian = PETSC_BOOL3_FALSE; 2211 #else 2212 B->hermitian = PETSC_BOOL3_TRUE; 2213 #endif 2214 2215 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2216 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2217 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2218 if (flg) { 2219 PetscReal fact = 1.39; 2220 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2221 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2222 if (fact <= 1.0) fact = 1.39; 2223 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2224 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2225 } 2226 PetscOptionsEnd(); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2231 /*MC 2232 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2233 2234 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2235 and `MATMPISBAIJ` otherwise. 2236 2237 Options Database Key: 2238 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2239 2240 Level: beginner 2241 2242 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2243 M*/ 2244 2245 /*@ 2246 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2247 the user should preallocate the matrix storage by setting the parameters 2248 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2249 performance can be increased by more than a factor of 50. 2250 2251 Collective 2252 2253 Input Parameters: 2254 + B - the matrix 2255 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2256 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2257 . d_nz - number of block nonzeros per block row in diagonal portion of local 2258 submatrix (same for all local rows) 2259 . d_nnz - array containing the number of block nonzeros in the various block rows 2260 in the upper triangular and diagonal part of the in diagonal portion of the local 2261 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2262 for the diagonal entry and set a value even if it is zero. 2263 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2264 submatrix (same for all local rows). 2265 - o_nnz - array containing the number of nonzeros in the various block rows of the 2266 off-diagonal portion of the local submatrix that is right of the diagonal 2267 (possibly different for each block row) or `NULL`. 2268 2269 Options Database Keys: 2270 + -mat_no_unroll - uses code that does not unroll the loops in the 2271 block calculations (much slower) 2272 - -mat_block_size - size of the blocks to use 2273 2274 Level: intermediate 2275 2276 Notes: 2277 2278 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2279 than it must be used on all processors that share the object for that argument. 2280 2281 If the *_nnz parameter is given then the *_nz parameter is ignored 2282 2283 Storage Information: 2284 For a square global matrix we define each processor's diagonal portion 2285 to be its local rows and the corresponding columns (a square submatrix); 2286 each processor's off-diagonal portion encompasses the remainder of the 2287 local matrix (a rectangular submatrix). 2288 2289 The user can specify preallocated storage for the diagonal part of 2290 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2291 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2292 memory allocation. Likewise, specify preallocated storage for the 2293 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2294 2295 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2296 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2297 You can also run with the option `-info` and look for messages with the string 2298 malloc in them to see if additional memory allocation was needed. 2299 2300 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2301 the figure below we depict these three local rows and all columns (0-11). 2302 2303 .vb 2304 0 1 2 3 4 5 6 7 8 9 10 11 2305 -------------------------- 2306 row 3 |. . . d d d o o o o o o 2307 row 4 |. . . d d d o o o o o o 2308 row 5 |. . . d d d o o o o o o 2309 -------------------------- 2310 .ve 2311 2312 Thus, any entries in the d locations are stored in the d (diagonal) 2313 submatrix, and any entries in the o locations are stored in the 2314 o (off-diagonal) submatrix. Note that the d matrix is stored in 2315 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2316 2317 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2318 plus the diagonal part of the d matrix, 2319 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2320 2321 In general, for PDE problems in which most nonzeros are near the diagonal, 2322 one expects `d_nz` >> `o_nz`. 2323 2324 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2325 @*/ 2326 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2327 { 2328 PetscFunctionBegin; 2329 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2330 PetscValidType(B, 1); 2331 PetscValidLogicalCollectiveInt(B, bs, 2); 2332 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2333 PetscFunctionReturn(PETSC_SUCCESS); 2334 } 2335 2336 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2337 /*@ 2338 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2339 (block compressed row). For good matrix assembly performance 2340 the user should preallocate the matrix storage by setting the parameters 2341 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2342 2343 Collective 2344 2345 Input Parameters: 2346 + comm - MPI communicator 2347 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2348 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2349 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2350 This value should be the same as the local size used in creating the 2351 y vector for the matrix-vector product y = Ax. 2352 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2353 This value should be the same as the local size used in creating the 2354 x vector for the matrix-vector product y = Ax. 2355 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2356 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2357 . d_nz - number of block nonzeros per block row in diagonal portion of local 2358 submatrix (same for all local rows) 2359 . d_nnz - array containing the number of block nonzeros in the various block rows 2360 in the upper triangular portion of the in diagonal portion of the local 2361 (possibly different for each block block row) or `NULL`. 2362 If you plan to factor the matrix you must leave room for the diagonal entry and 2363 set its value even if it is zero. 2364 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2365 submatrix (same for all local rows). 2366 - o_nnz - array containing the number of nonzeros in the various block rows of the 2367 off-diagonal portion of the local submatrix (possibly different for 2368 each block row) or `NULL`. 2369 2370 Output Parameter: 2371 . A - the matrix 2372 2373 Options Database Keys: 2374 + -mat_no_unroll - uses code that does not unroll the loops in the 2375 block calculations (much slower) 2376 . -mat_block_size - size of the blocks to use 2377 - -mat_mpi - use the parallel matrix data structures even on one processor 2378 (defaults to using SeqBAIJ format on one processor) 2379 2380 Level: intermediate 2381 2382 Notes: 2383 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2384 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2385 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2386 2387 The number of rows and columns must be divisible by blocksize. 2388 This matrix type does not support complex Hermitian operation. 2389 2390 The user MUST specify either the local or global matrix dimensions 2391 (possibly both). 2392 2393 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2394 than it must be used on all processors that share the object for that argument. 2395 2396 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by 2397 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 2398 2399 If the *_nnz parameter is given then the *_nz parameter is ignored 2400 2401 Storage Information: 2402 For a square global matrix we define each processor's diagonal portion 2403 to be its local rows and the corresponding columns (a square submatrix); 2404 each processor's off-diagonal portion encompasses the remainder of the 2405 local matrix (a rectangular submatrix). 2406 2407 The user can specify preallocated storage for the diagonal part of 2408 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2409 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2410 memory allocation. Likewise, specify preallocated storage for the 2411 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2412 2413 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2414 the figure below we depict these three local rows and all columns (0-11). 2415 2416 .vb 2417 0 1 2 3 4 5 6 7 8 9 10 11 2418 -------------------------- 2419 row 3 |. . . d d d o o o o o o 2420 row 4 |. . . d d d o o o o o o 2421 row 5 |. . . d d d o o o o o o 2422 -------------------------- 2423 .ve 2424 2425 Thus, any entries in the d locations are stored in the d (diagonal) 2426 submatrix, and any entries in the o locations are stored in the 2427 o (off-diagonal) submatrix. Note that the d matrix is stored in 2428 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2429 2430 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2431 plus the diagonal part of the d matrix, 2432 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2433 In general, for PDE problems in which most nonzeros are near the diagonal, 2434 one expects `d_nz` >> `o_nz`. 2435 2436 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, 2437 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 2438 @*/ 2439 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2440 { 2441 PetscMPIInt size; 2442 2443 PetscFunctionBegin; 2444 PetscCall(MatCreate(comm, A)); 2445 PetscCall(MatSetSizes(*A, m, n, M, N)); 2446 PetscCallMPI(MPI_Comm_size(comm, &size)); 2447 if (size > 1) { 2448 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2449 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2450 } else { 2451 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2452 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2453 } 2454 PetscFunctionReturn(PETSC_SUCCESS); 2455 } 2456 2457 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2458 { 2459 Mat mat; 2460 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2461 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2462 PetscScalar *array; 2463 2464 PetscFunctionBegin; 2465 *newmat = NULL; 2466 2467 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2468 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2469 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2470 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2471 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2472 2473 if (matin->hash_active) { 2474 PetscCall(MatSetUp(mat)); 2475 } else { 2476 mat->factortype = matin->factortype; 2477 mat->preallocated = PETSC_TRUE; 2478 mat->assembled = PETSC_TRUE; 2479 mat->insertmode = NOT_SET_VALUES; 2480 2481 a = (Mat_MPISBAIJ *)mat->data; 2482 a->bs2 = oldmat->bs2; 2483 a->mbs = oldmat->mbs; 2484 a->nbs = oldmat->nbs; 2485 a->Mbs = oldmat->Mbs; 2486 a->Nbs = oldmat->Nbs; 2487 2488 a->size = oldmat->size; 2489 a->rank = oldmat->rank; 2490 a->donotstash = oldmat->donotstash; 2491 a->roworiented = oldmat->roworiented; 2492 a->rowindices = NULL; 2493 a->rowvalues = NULL; 2494 a->getrowactive = PETSC_FALSE; 2495 a->barray = NULL; 2496 a->rstartbs = oldmat->rstartbs; 2497 a->rendbs = oldmat->rendbs; 2498 a->cstartbs = oldmat->cstartbs; 2499 a->cendbs = oldmat->cendbs; 2500 2501 /* hash table stuff */ 2502 a->ht = NULL; 2503 a->hd = NULL; 2504 a->ht_size = 0; 2505 a->ht_flag = oldmat->ht_flag; 2506 a->ht_fact = oldmat->ht_fact; 2507 a->ht_total_ct = 0; 2508 a->ht_insert_ct = 0; 2509 2510 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2511 if (oldmat->colmap) { 2512 #if defined(PETSC_USE_CTABLE) 2513 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2514 #else 2515 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2516 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2517 #endif 2518 } else a->colmap = NULL; 2519 2520 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 2521 PetscCall(PetscMalloc1(len, &a->garray)); 2522 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2523 } else a->garray = NULL; 2524 2525 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2526 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2527 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2528 2529 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2530 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2531 2532 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2533 PetscCall(VecGetArray(a->slvec1, &array)); 2534 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2535 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2536 PetscCall(VecRestoreArray(a->slvec1, &array)); 2537 PetscCall(VecGetArray(a->slvec0, &array)); 2538 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2539 PetscCall(VecRestoreArray(a->slvec0, &array)); 2540 2541 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2542 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2543 a->sMvctx = oldmat->sMvctx; 2544 2545 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2546 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2547 } 2548 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2549 *newmat = mat; 2550 PetscFunctionReturn(PETSC_SUCCESS); 2551 } 2552 2553 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2554 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2555 2556 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2557 { 2558 PetscBool isbinary; 2559 2560 PetscFunctionBegin; 2561 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2562 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2563 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2564 PetscFunctionReturn(PETSC_SUCCESS); 2565 } 2566 2567 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2568 { 2569 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2570 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data; 2571 PetscReal atmp; 2572 PetscReal *work, *svalues, *rvalues; 2573 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2574 PetscMPIInt rank, size; 2575 PetscInt *rowners_bs, count, source; 2576 PetscScalar *va; 2577 MatScalar *ba; 2578 MPI_Status stat; 2579 2580 PetscFunctionBegin; 2581 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2582 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2583 PetscCall(VecGetArray(v, &va)); 2584 2585 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2586 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2587 2588 bs = A->rmap->bs; 2589 mbs = a->mbs; 2590 Mbs = a->Mbs; 2591 ba = b->a; 2592 bi = b->i; 2593 bj = b->j; 2594 2595 /* find ownerships */ 2596 rowners_bs = A->rmap->range; 2597 2598 /* each proc creates an array to be distributed */ 2599 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2600 2601 /* row_max for B */ 2602 if (rank != size - 1) { 2603 for (i = 0; i < mbs; i++) { 2604 ncols = bi[1] - bi[0]; 2605 bi++; 2606 brow = bs * i; 2607 for (j = 0; j < ncols; j++) { 2608 bcol = bs * (*bj); 2609 for (kcol = 0; kcol < bs; kcol++) { 2610 col = bcol + kcol; /* local col index */ 2611 col += rowners_bs[rank + 1]; /* global col index */ 2612 for (krow = 0; krow < bs; krow++) { 2613 atmp = PetscAbsScalar(*ba); 2614 ba++; 2615 row = brow + krow; /* local row index */ 2616 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2617 if (work[col] < atmp) work[col] = atmp; 2618 } 2619 } 2620 bj++; 2621 } 2622 } 2623 2624 /* send values to its owners */ 2625 for (PetscMPIInt dest = rank + 1; dest < size; dest++) { 2626 svalues = work + rowners_bs[dest]; 2627 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2628 PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2629 } 2630 } 2631 2632 /* receive values */ 2633 if (rank) { 2634 rvalues = work; 2635 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2636 for (source = 0; source < rank; source++) { 2637 PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2638 /* process values */ 2639 for (i = 0; i < count; i++) { 2640 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2641 } 2642 } 2643 } 2644 2645 PetscCall(VecRestoreArray(v, &va)); 2646 PetscCall(PetscFree(work)); 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2651 { 2652 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2653 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2654 PetscScalar *x, *ptr, *from; 2655 Vec bb1; 2656 const PetscScalar *b; 2657 2658 PetscFunctionBegin; 2659 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2660 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2661 2662 if (flag == SOR_APPLY_UPPER) { 2663 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2664 PetscFunctionReturn(PETSC_SUCCESS); 2665 } 2666 2667 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2668 if (flag & SOR_ZERO_INITIAL_GUESS) { 2669 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2670 its--; 2671 } 2672 2673 PetscCall(VecDuplicate(bb, &bb1)); 2674 while (its--) { 2675 /* lower triangular part: slvec0b = - B^T*xx */ 2676 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2677 2678 /* copy xx into slvec0a */ 2679 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2680 PetscCall(VecGetArray(xx, &x)); 2681 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2682 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2683 2684 PetscCall(VecScale(mat->slvec0, -1.0)); 2685 2686 /* copy bb into slvec1a */ 2687 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2688 PetscCall(VecGetArrayRead(bb, &b)); 2689 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2690 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2691 2692 /* set slvec1b = 0 */ 2693 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2694 PetscCall(VecZeroEntries(mat->slvec1b)); 2695 2696 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2697 PetscCall(VecRestoreArray(xx, &x)); 2698 PetscCall(VecRestoreArrayRead(bb, &b)); 2699 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2700 2701 /* upper triangular part: bb1 = bb1 - B*x */ 2702 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2703 2704 /* local diagonal sweep */ 2705 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2706 } 2707 PetscCall(VecDestroy(&bb1)); 2708 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2709 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2710 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2711 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2712 } else if (flag & SOR_EISENSTAT) { 2713 Vec xx1; 2714 PetscBool hasop; 2715 const PetscScalar *diag; 2716 PetscScalar *sl, scale = (omega - 2.0) / omega; 2717 PetscInt i, n; 2718 2719 if (!mat->xx1) { 2720 PetscCall(VecDuplicate(bb, &mat->xx1)); 2721 PetscCall(VecDuplicate(bb, &mat->bb1)); 2722 } 2723 xx1 = mat->xx1; 2724 bb1 = mat->bb1; 2725 2726 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2727 2728 if (!mat->diag) { 2729 /* this is wrong for same matrix with new nonzero values */ 2730 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2731 PetscCall(MatGetDiagonal(matin, mat->diag)); 2732 } 2733 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2734 2735 if (hasop) { 2736 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2737 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2738 } else { 2739 /* 2740 These two lines are replaced by code that may be a bit faster for a good compiler 2741 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2742 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2743 */ 2744 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2745 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2746 PetscCall(VecGetArrayRead(bb, &b)); 2747 PetscCall(VecGetArray(xx, &x)); 2748 PetscCall(VecGetLocalSize(xx, &n)); 2749 if (omega == 1.0) { 2750 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2751 PetscCall(PetscLogFlops(2.0 * n)); 2752 } else { 2753 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2754 PetscCall(PetscLogFlops(3.0 * n)); 2755 } 2756 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2757 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2758 PetscCall(VecRestoreArrayRead(bb, &b)); 2759 PetscCall(VecRestoreArray(xx, &x)); 2760 } 2761 2762 /* multiply off-diagonal portion of matrix */ 2763 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2764 PetscCall(VecZeroEntries(mat->slvec1b)); 2765 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2766 PetscCall(VecGetArray(mat->slvec0, &from)); 2767 PetscCall(VecGetArray(xx, &x)); 2768 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2769 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2770 PetscCall(VecRestoreArray(xx, &x)); 2771 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2772 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2773 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2774 2775 /* local sweep */ 2776 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2777 PetscCall(VecAXPY(xx, 1.0, xx1)); 2778 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2779 PetscFunctionReturn(PETSC_SUCCESS); 2780 } 2781 2782 /*@ 2783 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows. 2784 2785 Collective 2786 2787 Input Parameters: 2788 + comm - MPI communicator 2789 . bs - the block size, only a block size of 1 is supported 2790 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2791 . n - This value should be the same as the local size used in creating the 2792 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 2793 calculated if `N` is given) For square matrices `n` is almost always `m`. 2794 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2795 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2796 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2797 . j - column indices 2798 - a - matrix values 2799 2800 Output Parameter: 2801 . mat - the matrix 2802 2803 Level: intermediate 2804 2805 Notes: 2806 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2807 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2808 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2809 2810 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2811 2812 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2813 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()` 2814 @*/ 2815 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2816 { 2817 PetscFunctionBegin; 2818 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2819 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2820 PetscCall(MatCreate(comm, mat)); 2821 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2822 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2823 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2824 PetscFunctionReturn(PETSC_SUCCESS); 2825 } 2826 2827 /*@ 2828 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2829 2830 Collective 2831 2832 Input Parameters: 2833 + B - the matrix 2834 . bs - the block size 2835 . i - the indices into `j` for the start of each local row (indices start with zero) 2836 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 2837 - v - optional values in the matrix, pass `NULL` if not provided 2838 2839 Level: advanced 2840 2841 Notes: 2842 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2843 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2844 called this routine. 2845 2846 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2847 and usually the numerical values as well 2848 2849 Any entries passed in that are below the diagonal are ignored 2850 2851 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 2852 `MatCreateMPISBAIJWithArrays()` 2853 @*/ 2854 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2855 { 2856 PetscFunctionBegin; 2857 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2858 PetscFunctionReturn(PETSC_SUCCESS); 2859 } 2860 2861 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2862 { 2863 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2864 PetscInt *indx; 2865 PetscScalar *values; 2866 2867 PetscFunctionBegin; 2868 PetscCall(MatGetSize(inmat, &m, &N)); 2869 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2870 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2871 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2872 PetscInt *bindx, rmax = a->rmax, j; 2873 PetscMPIInt rank, size; 2874 2875 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2876 mbs = m / bs; 2877 Nbs = N / cbs; 2878 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2879 nbs = n / cbs; 2880 2881 PetscCall(PetscMalloc1(rmax, &bindx)); 2882 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2883 2884 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2885 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2886 if (rank == size - 1) { 2887 /* Check sum(nbs) = Nbs */ 2888 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2889 } 2890 2891 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2892 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2893 for (i = 0; i < mbs; i++) { 2894 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2895 nnz = nnz / bs; 2896 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2897 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2898 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2899 } 2900 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2901 PetscCall(PetscFree(bindx)); 2902 2903 PetscCall(MatCreate(comm, outmat)); 2904 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2905 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2906 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2907 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2908 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2909 MatPreallocateEnd(dnz, onz); 2910 } 2911 2912 /* numeric phase */ 2913 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2914 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2915 2916 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2917 for (i = 0; i < m; i++) { 2918 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2919 Ii = i + rstart; 2920 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2921 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2922 } 2923 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2924 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2925 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2926 PetscFunctionReturn(PETSC_SUCCESS); 2927 } 2928