1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 #if defined(PETSC_USE_LOG) 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 #endif 14 PetscCall(MatStashDestroy_Private(&mat->stash)); 15 PetscCall(MatStashDestroy_Private(&mat->bstash)); 16 PetscCall(MatDestroy(&baij->A)); 17 PetscCall(MatDestroy(&baij->B)); 18 #if defined(PETSC_USE_CTABLE) 19 PetscCall(PetscHMapIDestroy(&baij->colmap)); 20 #else 21 PetscCall(PetscFree(baij->colmap)); 22 #endif 23 PetscCall(PetscFree(baij->garray)); 24 PetscCall(VecDestroy(&baij->lvec)); 25 PetscCall(VecScatterDestroy(&baij->Mvctx)); 26 PetscCall(VecDestroy(&baij->slvec0)); 27 PetscCall(VecDestroy(&baij->slvec0b)); 28 PetscCall(VecDestroy(&baij->slvec1)); 29 PetscCall(VecDestroy(&baij->slvec1a)); 30 PetscCall(VecDestroy(&baij->slvec1b)); 31 PetscCall(VecScatterDestroy(&baij->sMvctx)); 32 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 33 PetscCall(PetscFree(baij->barray)); 34 PetscCall(PetscFree(baij->hd)); 35 PetscCall(VecDestroy(&baij->diag)); 36 PetscCall(VecDestroy(&baij->bb1)); 37 PetscCall(VecDestroy(&baij->xx1)); 38 #if defined(PETSC_USE_REAL_MAT_SINGLE) 39 PetscCall(PetscFree(baij->setvaluescopy)); 40 #endif 41 PetscCall(PetscFree(baij->in_loc)); 42 PetscCall(PetscFree(baij->v_loc)); 43 PetscCall(PetscFree(baij->rangebs)); 44 PetscCall(PetscFree(mat->data)); 45 46 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 51 #if defined(PETSC_HAVE_ELEMENTAL) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 53 #endif 54 #if defined(PETSC_HAVE_SCALAPACK) 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 56 #endif 57 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 58 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 59 PetscFunctionReturn(PETSC_SUCCESS); 60 } 61 62 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 63 #define TYPE SBAIJ 64 #define TYPE_SBAIJ 65 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 66 #undef TYPE 67 #undef TYPE_SBAIJ 68 69 #if defined(PETSC_HAVE_ELEMENTAL) 70 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 71 #endif 72 #if defined(PETSC_HAVE_SCALAPACK) 73 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 74 #endif 75 76 /* This could be moved to matimpl.h */ 77 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 78 { 79 Mat preallocator; 80 PetscInt r, rstart, rend; 81 PetscInt bs, i, m, n, M, N; 82 PetscBool cong = PETSC_TRUE; 83 84 PetscFunctionBegin; 85 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 86 PetscValidLogicalCollectiveInt(B, nm, 2); 87 for (i = 0; i < nm; i++) { 88 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 89 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 90 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 91 } 92 PetscValidLogicalCollectiveBool(B, fill, 5); 93 PetscCall(MatGetBlockSize(B, &bs)); 94 PetscCall(MatGetSize(B, &M, &N)); 95 PetscCall(MatGetLocalSize(B, &m, &n)); 96 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 97 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 98 PetscCall(MatSetBlockSize(preallocator, bs)); 99 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 100 PetscCall(MatSetUp(preallocator)); 101 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 102 for (r = rstart; r < rend; ++r) { 103 PetscInt ncols; 104 const PetscInt *row; 105 const PetscScalar *vals; 106 107 for (i = 0; i < nm; i++) { 108 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 109 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 110 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 111 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 112 } 113 } 114 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 115 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 116 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 117 PetscCall(MatDestroy(&preallocator)); 118 PetscFunctionReturn(PETSC_SUCCESS); 119 } 120 121 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 122 { 123 Mat B; 124 PetscInt r; 125 126 PetscFunctionBegin; 127 if (reuse != MAT_REUSE_MATRIX) { 128 PetscBool symm = PETSC_TRUE, isdense; 129 PetscInt bs; 130 131 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 132 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 133 PetscCall(MatSetType(B, newtype)); 134 PetscCall(MatGetBlockSize(A, &bs)); 135 PetscCall(MatSetBlockSize(B, bs)); 136 PetscCall(PetscLayoutSetUp(B->rmap)); 137 PetscCall(PetscLayoutSetUp(B->cmap)); 138 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 139 if (!isdense) { 140 PetscCall(MatGetRowUpperTriangular(A)); 141 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 142 PetscCall(MatRestoreRowUpperTriangular(A)); 143 } else { 144 PetscCall(MatSetUp(B)); 145 } 146 } else { 147 B = *newmat; 148 PetscCall(MatZeroEntries(B)); 149 } 150 151 PetscCall(MatGetRowUpperTriangular(A)); 152 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 153 PetscInt ncols; 154 const PetscInt *row; 155 const PetscScalar *vals; 156 157 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 158 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 159 #if defined(PETSC_USE_COMPLEX) 160 if (A->hermitian == PETSC_BOOL3_TRUE) { 161 PetscInt i; 162 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 163 } else { 164 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 165 } 166 #else 167 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 168 #endif 169 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 170 } 171 PetscCall(MatRestoreRowUpperTriangular(A)); 172 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 173 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 174 175 if (reuse == MAT_INPLACE_MATRIX) { 176 PetscCall(MatHeaderReplace(A, &B)); 177 } else { 178 *newmat = B; 179 } 180 PetscFunctionReturn(PETSC_SUCCESS); 181 } 182 183 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 184 { 185 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 186 187 PetscFunctionBegin; 188 PetscCall(MatStoreValues(aij->A)); 189 PetscCall(MatStoreValues(aij->B)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 194 { 195 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 196 197 PetscFunctionBegin; 198 PetscCall(MatRetrieveValues(aij->A)); 199 PetscCall(MatRetrieveValues(aij->B)); 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 204 do { \ 205 brow = row / bs; \ 206 rp = aj + ai[brow]; \ 207 ap = aa + bs2 * ai[brow]; \ 208 rmax = aimax[brow]; \ 209 nrow = ailen[brow]; \ 210 bcol = col / bs; \ 211 ridx = row % bs; \ 212 cidx = col % bs; \ 213 low = 0; \ 214 high = nrow; \ 215 while (high - low > 3) { \ 216 t = (low + high) / 2; \ 217 if (rp[t] > bcol) high = t; \ 218 else low = t; \ 219 } \ 220 for (_i = low; _i < high; _i++) { \ 221 if (rp[_i] > bcol) break; \ 222 if (rp[_i] == bcol) { \ 223 bap = ap + bs2 * _i + bs * cidx + ridx; \ 224 if (addv == ADD_VALUES) *bap += value; \ 225 else *bap = value; \ 226 goto a_noinsert; \ 227 } \ 228 } \ 229 if (a->nonew == 1) goto a_noinsert; \ 230 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 231 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 232 N = nrow++ - 1; \ 233 /* shift up all the later entries in this row */ \ 234 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 235 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 236 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 237 rp[_i] = bcol; \ 238 ap[bs2 * _i + bs * cidx + ridx] = value; \ 239 A->nonzerostate++; \ 240 a_noinsert:; \ 241 ailen[brow] = nrow; \ 242 } while (0) 243 244 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 245 do { \ 246 brow = row / bs; \ 247 rp = bj + bi[brow]; \ 248 ap = ba + bs2 * bi[brow]; \ 249 rmax = bimax[brow]; \ 250 nrow = bilen[brow]; \ 251 bcol = col / bs; \ 252 ridx = row % bs; \ 253 cidx = col % bs; \ 254 low = 0; \ 255 high = nrow; \ 256 while (high - low > 3) { \ 257 t = (low + high) / 2; \ 258 if (rp[t] > bcol) high = t; \ 259 else low = t; \ 260 } \ 261 for (_i = low; _i < high; _i++) { \ 262 if (rp[_i] > bcol) break; \ 263 if (rp[_i] == bcol) { \ 264 bap = ap + bs2 * _i + bs * cidx + ridx; \ 265 if (addv == ADD_VALUES) *bap += value; \ 266 else *bap = value; \ 267 goto b_noinsert; \ 268 } \ 269 } \ 270 if (b->nonew == 1) goto b_noinsert; \ 271 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 272 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 273 N = nrow++ - 1; \ 274 /* shift up all the later entries in this row */ \ 275 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 276 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 277 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 278 rp[_i] = bcol; \ 279 ap[bs2 * _i + bs * cidx + ridx] = value; \ 280 B->nonzerostate++; \ 281 b_noinsert:; \ 282 bilen[brow] = nrow; \ 283 } while (0) 284 285 /* Only add/insert a(i,j) with i<=j (blocks). 286 Any a(i,j) with i>j input by user is ignored or generates an error 287 */ 288 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 289 { 290 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 291 MatScalar value; 292 PetscBool roworiented = baij->roworiented; 293 PetscInt i, j, row, col; 294 PetscInt rstart_orig = mat->rmap->rstart; 295 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 296 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 297 298 /* Some Variables required in the macro */ 299 Mat A = baij->A; 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 301 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 302 MatScalar *aa = a->a; 303 304 Mat B = baij->B; 305 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 306 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 307 MatScalar *ba = b->a; 308 309 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 310 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 311 MatScalar *ap, *bap; 312 313 /* for stash */ 314 PetscInt n_loc, *in_loc = NULL; 315 MatScalar *v_loc = NULL; 316 317 PetscFunctionBegin; 318 if (!baij->donotstash) { 319 if (n > baij->n_loc) { 320 PetscCall(PetscFree(baij->in_loc)); 321 PetscCall(PetscFree(baij->v_loc)); 322 PetscCall(PetscMalloc1(n, &baij->in_loc)); 323 PetscCall(PetscMalloc1(n, &baij->v_loc)); 324 325 baij->n_loc = n; 326 } 327 in_loc = baij->in_loc; 328 v_loc = baij->v_loc; 329 } 330 331 for (i = 0; i < m; i++) { 332 if (im[i] < 0) continue; 333 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 334 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 335 row = im[i] - rstart_orig; /* local row index */ 336 for (j = 0; j < n; j++) { 337 if (im[i] / bs > in[j] / bs) { 338 if (a->ignore_ltriangular) { 339 continue; /* ignore lower triangular blocks */ 340 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 341 } 342 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 343 col = in[j] - cstart_orig; /* local col index */ 344 brow = row / bs; 345 bcol = col / bs; 346 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 347 if (roworiented) value = v[i * n + j]; 348 else value = v[i + j * m]; 349 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 350 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 351 } else if (in[j] < 0) { 352 continue; 353 } else { 354 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 355 /* off-diag entry (B) */ 356 if (mat->was_assembled) { 357 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 358 #if defined(PETSC_USE_CTABLE) 359 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 360 col = col - 1; 361 #else 362 col = baij->colmap[in[j] / bs] - 1; 363 #endif 364 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 365 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 366 col = in[j]; 367 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 368 B = baij->B; 369 b = (Mat_SeqBAIJ *)(B)->data; 370 bimax = b->imax; 371 bi = b->i; 372 bilen = b->ilen; 373 bj = b->j; 374 ba = b->a; 375 } else col += in[j] % bs; 376 } else col = in[j]; 377 if (roworiented) value = v[i * n + j]; 378 else value = v[i + j * m]; 379 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 380 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 381 } 382 } 383 } else { /* off processor entry */ 384 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 385 if (!baij->donotstash) { 386 mat->assembled = PETSC_FALSE; 387 n_loc = 0; 388 for (j = 0; j < n; j++) { 389 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 390 in_loc[n_loc] = in[j]; 391 if (roworiented) { 392 v_loc[n_loc] = v[i * n + j]; 393 } else { 394 v_loc[n_loc] = v[j * m + i]; 395 } 396 n_loc++; 397 } 398 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 399 } 400 } 401 } 402 PetscFunctionReturn(PETSC_SUCCESS); 403 } 404 405 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 406 { 407 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 408 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 409 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 410 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 411 PetscBool roworiented = a->roworiented; 412 const PetscScalar *value = v; 413 MatScalar *ap, *aa = a->a, *bap; 414 415 PetscFunctionBegin; 416 if (col < row) { 417 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 418 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 419 } 420 rp = aj + ai[row]; 421 ap = aa + bs2 * ai[row]; 422 rmax = imax[row]; 423 nrow = ailen[row]; 424 value = v; 425 low = 0; 426 high = nrow; 427 428 while (high - low > 7) { 429 t = (low + high) / 2; 430 if (rp[t] > col) high = t; 431 else low = t; 432 } 433 for (i = low; i < high; i++) { 434 if (rp[i] > col) break; 435 if (rp[i] == col) { 436 bap = ap + bs2 * i; 437 if (roworiented) { 438 if (is == ADD_VALUES) { 439 for (ii = 0; ii < bs; ii++) { 440 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 441 } 442 } else { 443 for (ii = 0; ii < bs; ii++) { 444 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 445 } 446 } 447 } else { 448 if (is == ADD_VALUES) { 449 for (ii = 0; ii < bs; ii++) { 450 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 451 } 452 } else { 453 for (ii = 0; ii < bs; ii++) { 454 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 455 } 456 } 457 } 458 goto noinsert2; 459 } 460 } 461 if (nonew == 1) goto noinsert2; 462 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 463 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 464 N = nrow++ - 1; 465 high++; 466 /* shift up all the later entries in this row */ 467 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 468 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 469 rp[i] = col; 470 bap = ap + bs2 * i; 471 if (roworiented) { 472 for (ii = 0; ii < bs; ii++) { 473 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 474 } 475 } else { 476 for (ii = 0; ii < bs; ii++) { 477 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 478 } 479 } 480 noinsert2:; 481 ailen[row] = nrow; 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 /* 486 This routine is exactly duplicated in mpibaij.c 487 */ 488 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 489 { 490 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 491 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 492 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 493 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 494 PetscBool roworiented = a->roworiented; 495 const PetscScalar *value = v; 496 MatScalar *ap, *aa = a->a, *bap; 497 498 PetscFunctionBegin; 499 rp = aj + ai[row]; 500 ap = aa + bs2 * ai[row]; 501 rmax = imax[row]; 502 nrow = ailen[row]; 503 low = 0; 504 high = nrow; 505 value = v; 506 while (high - low > 7) { 507 t = (low + high) / 2; 508 if (rp[t] > col) high = t; 509 else low = t; 510 } 511 for (i = low; i < high; i++) { 512 if (rp[i] > col) break; 513 if (rp[i] == col) { 514 bap = ap + bs2 * i; 515 if (roworiented) { 516 if (is == ADD_VALUES) { 517 for (ii = 0; ii < bs; ii++) { 518 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 519 } 520 } else { 521 for (ii = 0; ii < bs; ii++) { 522 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 523 } 524 } 525 } else { 526 if (is == ADD_VALUES) { 527 for (ii = 0; ii < bs; ii++, value += bs) { 528 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 529 bap += bs; 530 } 531 } else { 532 for (ii = 0; ii < bs; ii++, value += bs) { 533 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 534 bap += bs; 535 } 536 } 537 } 538 goto noinsert2; 539 } 540 } 541 if (nonew == 1) goto noinsert2; 542 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 543 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 544 N = nrow++ - 1; 545 high++; 546 /* shift up all the later entries in this row */ 547 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 548 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 549 rp[i] = col; 550 bap = ap + bs2 * i; 551 if (roworiented) { 552 for (ii = 0; ii < bs; ii++) { 553 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 554 } 555 } else { 556 for (ii = 0; ii < bs; ii++) { 557 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 558 } 559 } 560 noinsert2:; 561 ailen[row] = nrow; 562 PetscFunctionReturn(PETSC_SUCCESS); 563 } 564 565 /* 566 This routine could be optimized by removing the need for the block copy below and passing stride information 567 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 568 */ 569 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 570 { 571 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 572 const MatScalar *value; 573 MatScalar *barray = baij->barray; 574 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 575 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 576 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 577 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 578 579 PetscFunctionBegin; 580 if (!barray) { 581 PetscCall(PetscMalloc1(bs2, &barray)); 582 baij->barray = barray; 583 } 584 585 if (roworiented) { 586 stepval = (n - 1) * bs; 587 } else { 588 stepval = (m - 1) * bs; 589 } 590 for (i = 0; i < m; i++) { 591 if (im[i] < 0) continue; 592 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 593 if (im[i] >= rstart && im[i] < rend) { 594 row = im[i] - rstart; 595 for (j = 0; j < n; j++) { 596 if (im[i] > in[j]) { 597 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 598 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 599 } 600 /* If NumCol = 1 then a copy is not required */ 601 if ((roworiented) && (n == 1)) { 602 barray = (MatScalar *)v + i * bs2; 603 } else if ((!roworiented) && (m == 1)) { 604 barray = (MatScalar *)v + j * bs2; 605 } else { /* Here a copy is required */ 606 if (roworiented) { 607 value = v + i * (stepval + bs) * bs + j * bs; 608 } else { 609 value = v + j * (stepval + bs) * bs + i * bs; 610 } 611 for (ii = 0; ii < bs; ii++, value += stepval) { 612 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 613 } 614 barray -= bs2; 615 } 616 617 if (in[j] >= cstart && in[j] < cend) { 618 col = in[j] - cstart; 619 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 620 } else if (in[j] < 0) { 621 continue; 622 } else { 623 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 624 if (mat->was_assembled) { 625 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 626 627 #if defined(PETSC_USE_DEBUG) 628 #if defined(PETSC_USE_CTABLE) 629 { 630 PetscInt data; 631 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 632 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 633 } 634 #else 635 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 636 #endif 637 #endif 638 #if defined(PETSC_USE_CTABLE) 639 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 640 col = (col - 1) / bs; 641 #else 642 col = (baij->colmap[in[j]] - 1) / bs; 643 #endif 644 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 645 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 646 col = in[j]; 647 } 648 } else col = in[j]; 649 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 650 } 651 } 652 } else { 653 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 654 if (!baij->donotstash) { 655 if (roworiented) { 656 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 657 } else { 658 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 659 } 660 } 661 } 662 } 663 PetscFunctionReturn(PETSC_SUCCESS); 664 } 665 666 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 669 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 670 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 671 672 PetscFunctionBegin; 673 for (i = 0; i < m; i++) { 674 if (idxm[i] < 0) continue; /* negative row */ 675 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 676 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 677 row = idxm[i] - bsrstart; 678 for (j = 0; j < n; j++) { 679 if (idxn[j] < 0) continue; /* negative column */ 680 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 681 if (idxn[j] >= bscstart && idxn[j] < bscend) { 682 col = idxn[j] - bscstart; 683 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 684 } else { 685 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 686 #if defined(PETSC_USE_CTABLE) 687 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 688 data--; 689 #else 690 data = baij->colmap[idxn[j] / bs] - 1; 691 #endif 692 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 693 else { 694 col = data + idxn[j] % bs; 695 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 696 } 697 } 698 } 699 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 700 } 701 PetscFunctionReturn(PETSC_SUCCESS); 702 } 703 704 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 705 { 706 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 707 PetscReal sum[2], *lnorm2; 708 709 PetscFunctionBegin; 710 if (baij->size == 1) { 711 PetscCall(MatNorm(baij->A, type, norm)); 712 } else { 713 if (type == NORM_FROBENIUS) { 714 PetscCall(PetscMalloc1(2, &lnorm2)); 715 PetscCall(MatNorm(baij->A, type, lnorm2)); 716 *lnorm2 = (*lnorm2) * (*lnorm2); 717 lnorm2++; /* squar power of norm(A) */ 718 PetscCall(MatNorm(baij->B, type, lnorm2)); 719 *lnorm2 = (*lnorm2) * (*lnorm2); 720 lnorm2--; /* squar power of norm(B) */ 721 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 722 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 723 PetscCall(PetscFree(lnorm2)); 724 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 725 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 726 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 727 PetscReal *rsum, *rsum2, vabs; 728 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 729 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 730 MatScalar *v; 731 732 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 733 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 734 /* Amat */ 735 v = amat->a; 736 jj = amat->j; 737 for (brow = 0; brow < mbs; brow++) { 738 grow = bs * (rstart + brow); 739 nz = amat->i[brow + 1] - amat->i[brow]; 740 for (bcol = 0; bcol < nz; bcol++) { 741 gcol = bs * (rstart + *jj); 742 jj++; 743 for (col = 0; col < bs; col++) { 744 for (row = 0; row < bs; row++) { 745 vabs = PetscAbsScalar(*v); 746 v++; 747 rsum[gcol + col] += vabs; 748 /* non-diagonal block */ 749 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 750 } 751 } 752 } 753 PetscCall(PetscLogFlops(nz * bs * bs)); 754 } 755 /* Bmat */ 756 v = bmat->a; 757 jj = bmat->j; 758 for (brow = 0; brow < mbs; brow++) { 759 grow = bs * (rstart + brow); 760 nz = bmat->i[brow + 1] - bmat->i[brow]; 761 for (bcol = 0; bcol < nz; bcol++) { 762 gcol = bs * garray[*jj]; 763 jj++; 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 vabs = PetscAbsScalar(*v); 767 v++; 768 rsum[gcol + col] += vabs; 769 rsum[grow + row] += vabs; 770 } 771 } 772 } 773 PetscCall(PetscLogFlops(nz * bs * bs)); 774 } 775 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 776 *norm = 0.0; 777 for (col = 0; col < mat->cmap->N; col++) { 778 if (rsum2[col] > *norm) *norm = rsum2[col]; 779 } 780 PetscCall(PetscFree2(rsum, rsum2)); 781 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 782 } 783 PetscFunctionReturn(PETSC_SUCCESS); 784 } 785 786 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 787 { 788 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 789 PetscInt nstash, reallocs; 790 791 PetscFunctionBegin; 792 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 793 794 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 795 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 796 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 797 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 798 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 799 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 800 PetscFunctionReturn(PETSC_SUCCESS); 801 } 802 803 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 804 { 805 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 806 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 807 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 808 PetscInt *row, *col; 809 PetscBool other_disassembled; 810 PetscMPIInt n; 811 PetscBool r1, r2, r3; 812 MatScalar *val; 813 814 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 815 PetscFunctionBegin; 816 if (!baij->donotstash && !mat->nooffprocentries) { 817 while (1) { 818 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 819 if (!flg) break; 820 821 for (i = 0; i < n;) { 822 /* Now identify the consecutive vals belonging to the same row */ 823 for (j = i, rstart = row[j]; j < n; j++) { 824 if (row[j] != rstart) break; 825 } 826 if (j < n) ncols = j - i; 827 else ncols = n - i; 828 /* Now assemble all these values with a single function call */ 829 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 830 i = j; 831 } 832 } 833 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 834 /* Now process the block-stash. Since the values are stashed column-oriented, 835 set the roworiented flag to column oriented, and after MatSetValues() 836 restore the original flags */ 837 r1 = baij->roworiented; 838 r2 = a->roworiented; 839 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 840 841 baij->roworiented = PETSC_FALSE; 842 a->roworiented = PETSC_FALSE; 843 844 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 845 while (1) { 846 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 847 if (!flg) break; 848 849 for (i = 0; i < n;) { 850 /* Now identify the consecutive vals belonging to the same row */ 851 for (j = i, rstart = row[j]; j < n; j++) { 852 if (row[j] != rstart) break; 853 } 854 if (j < n) ncols = j - i; 855 else ncols = n - i; 856 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 857 i = j; 858 } 859 } 860 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 861 862 baij->roworiented = r1; 863 a->roworiented = r2; 864 865 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 866 } 867 868 PetscCall(MatAssemblyBegin(baij->A, mode)); 869 PetscCall(MatAssemblyEnd(baij->A, mode)); 870 871 /* determine if any processor has disassembled, if so we must 872 also disassemble ourselves, in order that we may reassemble. */ 873 /* 874 if nonzero structure of submatrix B cannot change then we know that 875 no processor disassembled thus we can skip this stuff 876 */ 877 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 878 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 879 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 880 } 881 882 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 883 PetscCall(MatAssemblyBegin(baij->B, mode)); 884 PetscCall(MatAssemblyEnd(baij->B, mode)); 885 886 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 887 888 baij->rowvalues = NULL; 889 890 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 891 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 892 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 893 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 894 } 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 899 #include <petscdraw.h> 900 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 901 { 902 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 903 PetscInt bs = mat->rmap->bs; 904 PetscMPIInt rank = baij->rank; 905 PetscBool iascii, isdraw; 906 PetscViewer sviewer; 907 PetscViewerFormat format; 908 909 PetscFunctionBegin; 910 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 911 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 912 if (iascii) { 913 PetscCall(PetscViewerGetFormat(viewer, &format)); 914 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 915 MatInfo info; 916 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 917 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 918 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 919 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 920 mat->rmap->bs, (double)info.memory)); 921 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 922 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 923 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 924 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 925 PetscCall(PetscViewerFlush(viewer)); 926 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 927 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 928 PetscCall(VecScatterView(baij->Mvctx, viewer)); 929 PetscFunctionReturn(PETSC_SUCCESS); 930 } else if (format == PETSC_VIEWER_ASCII_INFO) { 931 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 932 PetscFunctionReturn(PETSC_SUCCESS); 933 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 934 PetscFunctionReturn(PETSC_SUCCESS); 935 } 936 } 937 938 if (isdraw) { 939 PetscDraw draw; 940 PetscBool isnull; 941 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 942 PetscCall(PetscDrawIsNull(draw, &isnull)); 943 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 944 } 945 946 { 947 /* assemble the entire matrix onto first processor. */ 948 Mat A; 949 Mat_SeqSBAIJ *Aloc; 950 Mat_SeqBAIJ *Bloc; 951 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 952 MatScalar *a; 953 const char *matname; 954 955 /* Should this be the same type as mat? */ 956 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 957 if (rank == 0) { 958 PetscCall(MatSetSizes(A, M, N, M, N)); 959 } else { 960 PetscCall(MatSetSizes(A, 0, 0, M, N)); 961 } 962 PetscCall(MatSetType(A, MATMPISBAIJ)); 963 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 964 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 965 966 /* copy over the A part */ 967 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 968 ai = Aloc->i; 969 aj = Aloc->j; 970 a = Aloc->a; 971 PetscCall(PetscMalloc1(bs, &rvals)); 972 973 for (i = 0; i < mbs; i++) { 974 rvals[0] = bs * (baij->rstartbs + i); 975 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 976 for (j = ai[i]; j < ai[i + 1]; j++) { 977 col = (baij->cstartbs + aj[j]) * bs; 978 for (k = 0; k < bs; k++) { 979 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 980 col++; 981 a += bs; 982 } 983 } 984 } 985 /* copy over the B part */ 986 Bloc = (Mat_SeqBAIJ *)baij->B->data; 987 ai = Bloc->i; 988 aj = Bloc->j; 989 a = Bloc->a; 990 for (i = 0; i < mbs; i++) { 991 rvals[0] = bs * (baij->rstartbs + i); 992 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 993 for (j = ai[i]; j < ai[i + 1]; j++) { 994 col = baij->garray[aj[j]] * bs; 995 for (k = 0; k < bs; k++) { 996 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 997 col++; 998 a += bs; 999 } 1000 } 1001 } 1002 PetscCall(PetscFree(rvals)); 1003 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1004 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1005 /* 1006 Everyone has to call to draw the matrix since the graphics waits are 1007 synchronized across all processors that share the PetscDraw object 1008 */ 1009 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1010 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1011 if (rank == 0) { 1012 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 1013 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 1014 } 1015 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1016 PetscCall(PetscViewerFlush(viewer)); 1017 PetscCall(MatDestroy(&A)); 1018 } 1019 PetscFunctionReturn(PETSC_SUCCESS); 1020 } 1021 1022 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1023 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1024 1025 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1026 { 1027 PetscBool iascii, isdraw, issocket, isbinary; 1028 1029 PetscFunctionBegin; 1030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1031 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1032 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1033 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1034 if (iascii || isdraw || issocket) { 1035 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1036 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1037 PetscFunctionReturn(PETSC_SUCCESS); 1038 } 1039 1040 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1041 { 1042 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1043 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1044 PetscScalar *from; 1045 const PetscScalar *x; 1046 1047 PetscFunctionBegin; 1048 /* diagonal part */ 1049 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1050 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1051 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1052 PetscCall(VecZeroEntries(a->slvec1b)); 1053 1054 /* subdiagonal part */ 1055 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1056 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1057 1058 /* copy x into the vec slvec0 */ 1059 PetscCall(VecGetArray(a->slvec0, &from)); 1060 PetscCall(VecGetArrayRead(xx, &x)); 1061 1062 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1063 PetscCall(VecRestoreArray(a->slvec0, &from)); 1064 PetscCall(VecRestoreArrayRead(xx, &x)); 1065 1066 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1067 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1068 /* supperdiagonal part */ 1069 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1070 PetscFunctionReturn(PETSC_SUCCESS); 1071 } 1072 1073 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1074 { 1075 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1076 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1077 PetscScalar *from; 1078 const PetscScalar *x; 1079 1080 PetscFunctionBegin; 1081 /* diagonal part */ 1082 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1083 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1084 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1085 PetscCall(VecZeroEntries(a->slvec1b)); 1086 1087 /* subdiagonal part */ 1088 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1089 1090 /* copy x into the vec slvec0 */ 1091 PetscCall(VecGetArray(a->slvec0, &from)); 1092 PetscCall(VecGetArrayRead(xx, &x)); 1093 1094 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1095 PetscCall(VecRestoreArray(a->slvec0, &from)); 1096 PetscCall(VecRestoreArrayRead(xx, &x)); 1097 1098 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1099 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1100 /* supperdiagonal part */ 1101 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1106 { 1107 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1108 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1109 PetscScalar *from; 1110 const PetscScalar *x; 1111 1112 PetscFunctionBegin; 1113 /* diagonal part */ 1114 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1115 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1116 PetscCall(VecZeroEntries(a->slvec1b)); 1117 1118 /* subdiagonal part */ 1119 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1120 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1121 1122 /* copy x into the vec slvec0 */ 1123 PetscCall(VecGetArray(a->slvec0, &from)); 1124 PetscCall(VecGetArrayRead(xx, &x)); 1125 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1126 PetscCall(VecRestoreArray(a->slvec0, &from)); 1127 1128 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1129 PetscCall(VecRestoreArrayRead(xx, &x)); 1130 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1131 1132 /* supperdiagonal part */ 1133 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1138 { 1139 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1140 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1141 PetscScalar *from; 1142 const PetscScalar *x; 1143 1144 PetscFunctionBegin; 1145 /* diagonal part */ 1146 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1147 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1148 PetscCall(VecZeroEntries(a->slvec1b)); 1149 1150 /* subdiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1152 1153 /* copy x into the vec slvec0 */ 1154 PetscCall(VecGetArray(a->slvec0, &from)); 1155 PetscCall(VecGetArrayRead(xx, &x)); 1156 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1157 PetscCall(VecRestoreArray(a->slvec0, &from)); 1158 1159 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1160 PetscCall(VecRestoreArrayRead(xx, &x)); 1161 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1162 1163 /* supperdiagonal part */ 1164 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1165 PetscFunctionReturn(PETSC_SUCCESS); 1166 } 1167 1168 /* 1169 This only works correctly for square matrices where the subblock A->A is the 1170 diagonal block 1171 */ 1172 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1173 { 1174 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1178 PetscCall(MatGetDiagonal(a->A, v)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1183 { 1184 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1185 1186 PetscFunctionBegin; 1187 PetscCall(MatScale(a->A, aa)); 1188 PetscCall(MatScale(a->B, aa)); 1189 PetscFunctionReturn(PETSC_SUCCESS); 1190 } 1191 1192 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1193 { 1194 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1195 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1196 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1197 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1198 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1199 1200 PetscFunctionBegin; 1201 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1202 mat->getrowactive = PETSC_TRUE; 1203 1204 if (!mat->rowvalues && (idx || v)) { 1205 /* 1206 allocate enough space to hold information from the longest row. 1207 */ 1208 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1209 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1210 PetscInt max = 1, mbs = mat->mbs, tmp; 1211 for (i = 0; i < mbs; i++) { 1212 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1213 if (max < tmp) max = tmp; 1214 } 1215 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1216 } 1217 1218 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1219 lrow = row - brstart; /* local row index */ 1220 1221 pvA = &vworkA; 1222 pcA = &cworkA; 1223 pvB = &vworkB; 1224 pcB = &cworkB; 1225 if (!v) { 1226 pvA = NULL; 1227 pvB = NULL; 1228 } 1229 if (!idx) { 1230 pcA = NULL; 1231 if (!v) pcB = NULL; 1232 } 1233 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1234 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1235 nztot = nzA + nzB; 1236 1237 cmap = mat->garray; 1238 if (v || idx) { 1239 if (nztot) { 1240 /* Sort by increasing column numbers, assuming A and B already sorted */ 1241 PetscInt imark = -1; 1242 if (v) { 1243 *v = v_p = mat->rowvalues; 1244 for (i = 0; i < nzB; i++) { 1245 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1246 else break; 1247 } 1248 imark = i; 1249 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1250 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1251 } 1252 if (idx) { 1253 *idx = idx_p = mat->rowindices; 1254 if (imark > -1) { 1255 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1256 } else { 1257 for (i = 0; i < nzB; i++) { 1258 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1259 else break; 1260 } 1261 imark = i; 1262 } 1263 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1264 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1265 } 1266 } else { 1267 if (idx) *idx = NULL; 1268 if (v) *v = NULL; 1269 } 1270 } 1271 *nz = nztot; 1272 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1273 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1278 { 1279 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1280 1281 PetscFunctionBegin; 1282 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1283 baij->getrowactive = PETSC_FALSE; 1284 PetscFunctionReturn(PETSC_SUCCESS); 1285 } 1286 1287 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1288 { 1289 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1290 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1291 1292 PetscFunctionBegin; 1293 aA->getrow_utriangular = PETSC_TRUE; 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } 1296 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1297 { 1298 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1299 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1300 1301 PetscFunctionBegin; 1302 aA->getrow_utriangular = PETSC_FALSE; 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } 1305 1306 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1307 { 1308 PetscFunctionBegin; 1309 if (PetscDefined(USE_COMPLEX)) { 1310 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1311 1312 PetscCall(MatConjugate(a->A)); 1313 PetscCall(MatConjugate(a->B)); 1314 } 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } 1317 1318 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1319 { 1320 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1321 1322 PetscFunctionBegin; 1323 PetscCall(MatRealPart(a->A)); 1324 PetscCall(MatRealPart(a->B)); 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 1328 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1329 { 1330 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1331 1332 PetscFunctionBegin; 1333 PetscCall(MatImaginaryPart(a->A)); 1334 PetscCall(MatImaginaryPart(a->B)); 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } 1337 1338 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1339 Input: isrow - distributed(parallel), 1340 iscol_local - locally owned (seq) 1341 */ 1342 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1343 { 1344 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1345 const PetscInt *ptr1, *ptr2; 1346 1347 PetscFunctionBegin; 1348 PetscCall(ISGetLocalSize(isrow, &sz1)); 1349 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1350 if (sz1 > sz2) { 1351 *flg = PETSC_FALSE; 1352 PetscFunctionReturn(PETSC_SUCCESS); 1353 } 1354 1355 PetscCall(ISGetIndices(isrow, &ptr1)); 1356 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1357 1358 PetscCall(PetscMalloc1(sz1, &a1)); 1359 PetscCall(PetscMalloc1(sz2, &a2)); 1360 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1361 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1362 PetscCall(PetscSortInt(sz1, a1)); 1363 PetscCall(PetscSortInt(sz2, a2)); 1364 1365 nmatch = 0; 1366 k = 0; 1367 for (i = 0; i < sz1; i++) { 1368 for (j = k; j < sz2; j++) { 1369 if (a1[i] == a2[j]) { 1370 k = j; 1371 nmatch++; 1372 break; 1373 } 1374 } 1375 } 1376 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1377 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1378 PetscCall(PetscFree(a1)); 1379 PetscCall(PetscFree(a2)); 1380 if (nmatch < sz1) { 1381 *flg = PETSC_FALSE; 1382 } else { 1383 *flg = PETSC_TRUE; 1384 } 1385 PetscFunctionReturn(PETSC_SUCCESS); 1386 } 1387 1388 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1389 { 1390 IS iscol_local; 1391 PetscInt csize; 1392 PetscBool isequal; 1393 1394 PetscFunctionBegin; 1395 PetscCall(ISGetLocalSize(iscol, &csize)); 1396 if (call == MAT_REUSE_MATRIX) { 1397 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1398 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1399 } else { 1400 PetscBool issorted; 1401 1402 PetscCall(ISAllGather(iscol, &iscol_local)); 1403 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1404 PetscCall(ISSorted(iscol_local, &issorted)); 1405 PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 1406 } 1407 1408 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1409 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1410 if (call == MAT_INITIAL_MATRIX) { 1411 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1412 PetscCall(ISDestroy(&iscol_local)); 1413 } 1414 PetscFunctionReturn(PETSC_SUCCESS); 1415 } 1416 1417 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1418 { 1419 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1420 1421 PetscFunctionBegin; 1422 PetscCall(MatZeroEntries(l->A)); 1423 PetscCall(MatZeroEntries(l->B)); 1424 PetscFunctionReturn(PETSC_SUCCESS); 1425 } 1426 1427 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1428 { 1429 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1430 Mat A = a->A, B = a->B; 1431 PetscLogDouble isend[5], irecv[5]; 1432 1433 PetscFunctionBegin; 1434 info->block_size = (PetscReal)matin->rmap->bs; 1435 1436 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1437 1438 isend[0] = info->nz_used; 1439 isend[1] = info->nz_allocated; 1440 isend[2] = info->nz_unneeded; 1441 isend[3] = info->memory; 1442 isend[4] = info->mallocs; 1443 1444 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1445 1446 isend[0] += info->nz_used; 1447 isend[1] += info->nz_allocated; 1448 isend[2] += info->nz_unneeded; 1449 isend[3] += info->memory; 1450 isend[4] += info->mallocs; 1451 if (flag == MAT_LOCAL) { 1452 info->nz_used = isend[0]; 1453 info->nz_allocated = isend[1]; 1454 info->nz_unneeded = isend[2]; 1455 info->memory = isend[3]; 1456 info->mallocs = isend[4]; 1457 } else if (flag == MAT_GLOBAL_MAX) { 1458 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1459 1460 info->nz_used = irecv[0]; 1461 info->nz_allocated = irecv[1]; 1462 info->nz_unneeded = irecv[2]; 1463 info->memory = irecv[3]; 1464 info->mallocs = irecv[4]; 1465 } else if (flag == MAT_GLOBAL_SUM) { 1466 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1467 1468 info->nz_used = irecv[0]; 1469 info->nz_allocated = irecv[1]; 1470 info->nz_unneeded = irecv[2]; 1471 info->memory = irecv[3]; 1472 info->mallocs = irecv[4]; 1473 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1474 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1475 info->fill_ratio_needed = 0; 1476 info->factor_mallocs = 0; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1481 { 1482 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1483 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1484 1485 PetscFunctionBegin; 1486 switch (op) { 1487 case MAT_NEW_NONZERO_LOCATIONS: 1488 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1489 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1490 case MAT_KEEP_NONZERO_PATTERN: 1491 case MAT_SUBMAT_SINGLEIS: 1492 case MAT_NEW_NONZERO_LOCATION_ERR: 1493 MatCheckPreallocated(A, 1); 1494 PetscCall(MatSetOption(a->A, op, flg)); 1495 PetscCall(MatSetOption(a->B, op, flg)); 1496 break; 1497 case MAT_ROW_ORIENTED: 1498 MatCheckPreallocated(A, 1); 1499 a->roworiented = flg; 1500 1501 PetscCall(MatSetOption(a->A, op, flg)); 1502 PetscCall(MatSetOption(a->B, op, flg)); 1503 break; 1504 case MAT_FORCE_DIAGONAL_ENTRIES: 1505 case MAT_SORTED_FULL: 1506 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1507 break; 1508 case MAT_IGNORE_OFF_PROC_ENTRIES: 1509 a->donotstash = flg; 1510 break; 1511 case MAT_USE_HASH_TABLE: 1512 a->ht_flag = flg; 1513 break; 1514 case MAT_HERMITIAN: 1515 MatCheckPreallocated(A, 1); 1516 PetscCall(MatSetOption(a->A, op, flg)); 1517 #if defined(PETSC_USE_COMPLEX) 1518 if (flg) { /* need different mat-vec ops */ 1519 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1520 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1521 A->ops->multtranspose = NULL; 1522 A->ops->multtransposeadd = NULL; 1523 A->symmetric = PETSC_BOOL3_FALSE; 1524 } 1525 #endif 1526 break; 1527 case MAT_SPD: 1528 case MAT_SYMMETRIC: 1529 MatCheckPreallocated(A, 1); 1530 PetscCall(MatSetOption(a->A, op, flg)); 1531 #if defined(PETSC_USE_COMPLEX) 1532 if (flg) { /* restore to use default mat-vec ops */ 1533 A->ops->mult = MatMult_MPISBAIJ; 1534 A->ops->multadd = MatMultAdd_MPISBAIJ; 1535 A->ops->multtranspose = MatMult_MPISBAIJ; 1536 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1537 } 1538 #endif 1539 break; 1540 case MAT_STRUCTURALLY_SYMMETRIC: 1541 MatCheckPreallocated(A, 1); 1542 PetscCall(MatSetOption(a->A, op, flg)); 1543 break; 1544 case MAT_SYMMETRY_ETERNAL: 1545 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1546 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1547 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1548 break; 1549 case MAT_SPD_ETERNAL: 1550 break; 1551 case MAT_IGNORE_LOWER_TRIANGULAR: 1552 aA->ignore_ltriangular = flg; 1553 break; 1554 case MAT_ERROR_LOWER_TRIANGULAR: 1555 aA->ignore_ltriangular = flg; 1556 break; 1557 case MAT_GETROW_UPPERTRIANGULAR: 1558 aA->getrow_utriangular = flg; 1559 break; 1560 default: 1561 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1562 } 1563 PetscFunctionReturn(PETSC_SUCCESS); 1564 } 1565 1566 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1567 { 1568 PetscFunctionBegin; 1569 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1570 if (reuse == MAT_INITIAL_MATRIX) { 1571 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1572 } else if (reuse == MAT_REUSE_MATRIX) { 1573 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1574 } 1575 PetscFunctionReturn(PETSC_SUCCESS); 1576 } 1577 1578 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1579 { 1580 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1581 Mat a = baij->A, b = baij->B; 1582 PetscInt nv, m, n; 1583 PetscBool flg; 1584 1585 PetscFunctionBegin; 1586 if (ll != rr) { 1587 PetscCall(VecEqual(ll, rr, &flg)); 1588 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1589 } 1590 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1591 1592 PetscCall(MatGetLocalSize(mat, &m, &n)); 1593 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1594 1595 PetscCall(VecGetLocalSize(rr, &nv)); 1596 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1597 1598 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1599 1600 /* left diagonalscale the off-diagonal part */ 1601 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1602 1603 /* scale the diagonal part */ 1604 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1605 1606 /* right diagonalscale the off-diagonal part */ 1607 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1608 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1609 PetscFunctionReturn(PETSC_SUCCESS); 1610 } 1611 1612 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1613 { 1614 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1615 1616 PetscFunctionBegin; 1617 PetscCall(MatSetUnfactored(a->A)); 1618 PetscFunctionReturn(PETSC_SUCCESS); 1619 } 1620 1621 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1622 1623 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1624 { 1625 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1626 Mat a, b, c, d; 1627 PetscBool flg; 1628 1629 PetscFunctionBegin; 1630 a = matA->A; 1631 b = matA->B; 1632 c = matB->A; 1633 d = matB->B; 1634 1635 PetscCall(MatEqual(a, c, &flg)); 1636 if (flg) PetscCall(MatEqual(b, d, &flg)); 1637 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1638 PetscFunctionReturn(PETSC_SUCCESS); 1639 } 1640 1641 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1642 { 1643 PetscBool isbaij; 1644 1645 PetscFunctionBegin; 1646 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1647 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1648 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1649 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1650 PetscCall(MatGetRowUpperTriangular(A)); 1651 PetscCall(MatCopy_Basic(A, B, str)); 1652 PetscCall(MatRestoreRowUpperTriangular(A)); 1653 } else { 1654 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1655 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1656 1657 PetscCall(MatCopy(a->A, b->A, str)); 1658 PetscCall(MatCopy(a->B, b->B, str)); 1659 } 1660 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1661 PetscFunctionReturn(PETSC_SUCCESS); 1662 } 1663 1664 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1665 { 1666 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1667 PetscBLASInt bnz, one = 1; 1668 Mat_SeqSBAIJ *xa, *ya; 1669 Mat_SeqBAIJ *xb, *yb; 1670 1671 PetscFunctionBegin; 1672 if (str == SAME_NONZERO_PATTERN) { 1673 PetscScalar alpha = a; 1674 xa = (Mat_SeqSBAIJ *)xx->A->data; 1675 ya = (Mat_SeqSBAIJ *)yy->A->data; 1676 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1677 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1678 xb = (Mat_SeqBAIJ *)xx->B->data; 1679 yb = (Mat_SeqBAIJ *)yy->B->data; 1680 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1681 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1682 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1683 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1684 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1685 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1686 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1687 } else { 1688 Mat B; 1689 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1690 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1691 PetscCall(MatGetRowUpperTriangular(X)); 1692 PetscCall(MatGetRowUpperTriangular(Y)); 1693 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1694 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1695 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1696 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1697 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1698 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1699 PetscCall(MatSetType(B, MATMPISBAIJ)); 1700 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1701 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1702 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1703 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1704 PetscCall(MatHeaderMerge(Y, &B)); 1705 PetscCall(PetscFree(nnz_d)); 1706 PetscCall(PetscFree(nnz_o)); 1707 PetscCall(MatRestoreRowUpperTriangular(X)); 1708 PetscCall(MatRestoreRowUpperTriangular(Y)); 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1714 { 1715 PetscInt i; 1716 PetscBool flg; 1717 1718 PetscFunctionBegin; 1719 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1720 for (i = 0; i < n; i++) { 1721 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1722 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1723 } 1724 PetscFunctionReturn(PETSC_SUCCESS); 1725 } 1726 1727 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1728 { 1729 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1730 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1731 1732 PetscFunctionBegin; 1733 if (!Y->preallocated) { 1734 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1735 } else if (!aij->nz) { 1736 PetscInt nonew = aij->nonew; 1737 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1738 aij->nonew = nonew; 1739 } 1740 PetscCall(MatShift_Basic(Y, a)); 1741 PetscFunctionReturn(PETSC_SUCCESS); 1742 } 1743 1744 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1745 { 1746 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1747 1748 PetscFunctionBegin; 1749 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1750 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1751 if (d) { 1752 PetscInt rstart; 1753 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1754 *d += rstart / A->rmap->bs; 1755 } 1756 PetscFunctionReturn(PETSC_SUCCESS); 1757 } 1758 1759 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1760 { 1761 PetscFunctionBegin; 1762 *a = ((Mat_MPISBAIJ *)A->data)->A; 1763 PetscFunctionReturn(PETSC_SUCCESS); 1764 } 1765 1766 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1767 { 1768 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1769 1770 PetscFunctionBegin; 1771 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1772 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1773 PetscFunctionReturn(PETSC_SUCCESS); 1774 } 1775 1776 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1777 MatGetRow_MPISBAIJ, 1778 MatRestoreRow_MPISBAIJ, 1779 MatMult_MPISBAIJ, 1780 /* 4*/ MatMultAdd_MPISBAIJ, 1781 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1782 MatMultAdd_MPISBAIJ, 1783 NULL, 1784 NULL, 1785 NULL, 1786 /* 10*/ NULL, 1787 NULL, 1788 NULL, 1789 MatSOR_MPISBAIJ, 1790 MatTranspose_MPISBAIJ, 1791 /* 15*/ MatGetInfo_MPISBAIJ, 1792 MatEqual_MPISBAIJ, 1793 MatGetDiagonal_MPISBAIJ, 1794 MatDiagonalScale_MPISBAIJ, 1795 MatNorm_MPISBAIJ, 1796 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1797 MatAssemblyEnd_MPISBAIJ, 1798 MatSetOption_MPISBAIJ, 1799 MatZeroEntries_MPISBAIJ, 1800 /* 24*/ NULL, 1801 NULL, 1802 NULL, 1803 NULL, 1804 NULL, 1805 /* 29*/ MatSetUp_MPI_Hash, 1806 NULL, 1807 NULL, 1808 MatGetDiagonalBlock_MPISBAIJ, 1809 NULL, 1810 /* 34*/ MatDuplicate_MPISBAIJ, 1811 NULL, 1812 NULL, 1813 NULL, 1814 NULL, 1815 /* 39*/ MatAXPY_MPISBAIJ, 1816 MatCreateSubMatrices_MPISBAIJ, 1817 MatIncreaseOverlap_MPISBAIJ, 1818 MatGetValues_MPISBAIJ, 1819 MatCopy_MPISBAIJ, 1820 /* 44*/ NULL, 1821 MatScale_MPISBAIJ, 1822 MatShift_MPISBAIJ, 1823 NULL, 1824 NULL, 1825 /* 49*/ NULL, 1826 NULL, 1827 NULL, 1828 NULL, 1829 NULL, 1830 /* 54*/ NULL, 1831 NULL, 1832 MatSetUnfactored_MPISBAIJ, 1833 NULL, 1834 MatSetValuesBlocked_MPISBAIJ, 1835 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1836 NULL, 1837 NULL, 1838 NULL, 1839 NULL, 1840 /* 64*/ NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 NULL, 1845 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1846 NULL, 1847 MatConvert_MPISBAIJ_Basic, 1848 NULL, 1849 NULL, 1850 /* 74*/ NULL, 1851 NULL, 1852 NULL, 1853 NULL, 1854 NULL, 1855 /* 79*/ NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 MatLoad_MPISBAIJ, 1860 /* 84*/ NULL, 1861 NULL, 1862 NULL, 1863 NULL, 1864 NULL, 1865 /* 89*/ NULL, 1866 NULL, 1867 NULL, 1868 NULL, 1869 NULL, 1870 /* 94*/ NULL, 1871 NULL, 1872 NULL, 1873 NULL, 1874 NULL, 1875 /* 99*/ NULL, 1876 NULL, 1877 NULL, 1878 MatConjugate_MPISBAIJ, 1879 NULL, 1880 /*104*/ NULL, 1881 MatRealPart_MPISBAIJ, 1882 MatImaginaryPart_MPISBAIJ, 1883 MatGetRowUpperTriangular_MPISBAIJ, 1884 MatRestoreRowUpperTriangular_MPISBAIJ, 1885 /*109*/ NULL, 1886 NULL, 1887 NULL, 1888 NULL, 1889 MatMissingDiagonal_MPISBAIJ, 1890 /*114*/ NULL, 1891 NULL, 1892 NULL, 1893 NULL, 1894 NULL, 1895 /*119*/ NULL, 1896 NULL, 1897 NULL, 1898 NULL, 1899 NULL, 1900 /*124*/ NULL, 1901 NULL, 1902 NULL, 1903 NULL, 1904 NULL, 1905 /*129*/ NULL, 1906 NULL, 1907 NULL, 1908 NULL, 1909 NULL, 1910 /*134*/ NULL, 1911 NULL, 1912 NULL, 1913 NULL, 1914 NULL, 1915 /*139*/ MatSetBlockSizes_Default, 1916 NULL, 1917 NULL, 1918 NULL, 1919 NULL, 1920 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1921 NULL, 1922 NULL, 1923 NULL, 1924 NULL, 1925 NULL, 1926 /*150*/ NULL, 1927 MatEliminateZeros_MPISBAIJ}; 1928 1929 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1930 { 1931 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1932 PetscInt i, mbs, Mbs; 1933 PetscMPIInt size; 1934 1935 PetscFunctionBegin; 1936 if (B->hash_active) { 1937 B->ops[0] = b->cops; 1938 B->hash_active = PETSC_FALSE; 1939 } 1940 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1941 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1942 PetscCall(PetscLayoutSetUp(B->rmap)); 1943 PetscCall(PetscLayoutSetUp(B->cmap)); 1944 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1945 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1946 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1947 1948 mbs = B->rmap->n / bs; 1949 Mbs = B->rmap->N / bs; 1950 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1951 1952 B->rmap->bs = bs; 1953 b->bs2 = bs * bs; 1954 b->mbs = mbs; 1955 b->Mbs = Mbs; 1956 b->nbs = B->cmap->n / bs; 1957 b->Nbs = B->cmap->N / bs; 1958 1959 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1960 b->rstartbs = B->rmap->rstart / bs; 1961 b->rendbs = B->rmap->rend / bs; 1962 1963 b->cstartbs = B->cmap->rstart / bs; 1964 b->cendbs = B->cmap->rend / bs; 1965 1966 #if defined(PETSC_USE_CTABLE) 1967 PetscCall(PetscHMapIDestroy(&b->colmap)); 1968 #else 1969 PetscCall(PetscFree(b->colmap)); 1970 #endif 1971 PetscCall(PetscFree(b->garray)); 1972 PetscCall(VecDestroy(&b->lvec)); 1973 PetscCall(VecScatterDestroy(&b->Mvctx)); 1974 PetscCall(VecDestroy(&b->slvec0)); 1975 PetscCall(VecDestroy(&b->slvec0b)); 1976 PetscCall(VecDestroy(&b->slvec1)); 1977 PetscCall(VecDestroy(&b->slvec1a)); 1978 PetscCall(VecDestroy(&b->slvec1b)); 1979 PetscCall(VecScatterDestroy(&b->sMvctx)); 1980 1981 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1982 PetscCall(MatDestroy(&b->B)); 1983 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1984 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1985 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1986 1987 PetscCall(MatDestroy(&b->A)); 1988 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1989 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1990 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1991 1992 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1993 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1994 1995 B->preallocated = PETSC_TRUE; 1996 B->was_assembled = PETSC_FALSE; 1997 B->assembled = PETSC_FALSE; 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2002 { 2003 PetscInt m, rstart, cend; 2004 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2005 const PetscInt *JJ = NULL; 2006 PetscScalar *values = NULL; 2007 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2008 PetscBool nooffprocentries; 2009 2010 PetscFunctionBegin; 2011 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2012 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2013 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2014 PetscCall(PetscLayoutSetUp(B->rmap)); 2015 PetscCall(PetscLayoutSetUp(B->cmap)); 2016 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2017 m = B->rmap->n / bs; 2018 rstart = B->rmap->rstart / bs; 2019 cend = B->cmap->rend / bs; 2020 2021 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2022 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2023 for (i = 0; i < m; i++) { 2024 nz = ii[i + 1] - ii[i]; 2025 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2026 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2027 JJ = jj + ii[i]; 2028 bd = 0; 2029 for (j = 0; j < nz; j++) { 2030 if (*JJ >= i + rstart) break; 2031 JJ++; 2032 bd++; 2033 } 2034 d = 0; 2035 for (; j < nz; j++) { 2036 if (*JJ++ >= cend) break; 2037 d++; 2038 } 2039 d_nnz[i] = d; 2040 o_nnz[i] = nz - d - bd; 2041 nz = nz - bd; 2042 nz_max = PetscMax(nz_max, nz); 2043 } 2044 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2045 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2046 PetscCall(PetscFree2(d_nnz, o_nnz)); 2047 2048 values = (PetscScalar *)V; 2049 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2050 for (i = 0; i < m; i++) { 2051 PetscInt row = i + rstart; 2052 PetscInt ncols = ii[i + 1] - ii[i]; 2053 const PetscInt *icols = jj + ii[i]; 2054 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2055 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2056 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2057 } else { /* block ordering does not match so we can only insert one block at a time. */ 2058 PetscInt j; 2059 for (j = 0; j < ncols; j++) { 2060 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2061 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2062 } 2063 } 2064 } 2065 2066 if (!V) PetscCall(PetscFree(values)); 2067 nooffprocentries = B->nooffprocentries; 2068 B->nooffprocentries = PETSC_TRUE; 2069 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2070 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2071 B->nooffprocentries = nooffprocentries; 2072 2073 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2074 PetscFunctionReturn(PETSC_SUCCESS); 2075 } 2076 2077 /*MC 2078 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2079 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2080 the matrix is stored. 2081 2082 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2083 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2084 2085 Options Database Key: 2086 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2087 2088 Level: beginner 2089 2090 Note: 2091 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2092 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2093 2094 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2095 M*/ 2096 2097 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2098 { 2099 Mat_MPISBAIJ *b; 2100 PetscBool flg = PETSC_FALSE; 2101 2102 PetscFunctionBegin; 2103 PetscCall(PetscNew(&b)); 2104 B->data = (void *)b; 2105 B->ops[0] = MatOps_Values; 2106 2107 B->ops->destroy = MatDestroy_MPISBAIJ; 2108 B->ops->view = MatView_MPISBAIJ; 2109 B->assembled = PETSC_FALSE; 2110 B->insertmode = NOT_SET_VALUES; 2111 2112 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2113 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2114 2115 /* build local table of row and column ownerships */ 2116 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2117 2118 /* build cache for off array entries formed */ 2119 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2120 2121 b->donotstash = PETSC_FALSE; 2122 b->colmap = NULL; 2123 b->garray = NULL; 2124 b->roworiented = PETSC_TRUE; 2125 2126 /* stuff used in block assembly */ 2127 b->barray = NULL; 2128 2129 /* stuff used for matrix vector multiply */ 2130 b->lvec = NULL; 2131 b->Mvctx = NULL; 2132 b->slvec0 = NULL; 2133 b->slvec0b = NULL; 2134 b->slvec1 = NULL; 2135 b->slvec1a = NULL; 2136 b->slvec1b = NULL; 2137 b->sMvctx = NULL; 2138 2139 /* stuff for MatGetRow() */ 2140 b->rowindices = NULL; 2141 b->rowvalues = NULL; 2142 b->getrowactive = PETSC_FALSE; 2143 2144 /* hash table stuff */ 2145 b->ht = NULL; 2146 b->hd = NULL; 2147 b->ht_size = 0; 2148 b->ht_flag = PETSC_FALSE; 2149 b->ht_fact = 0; 2150 b->ht_total_ct = 0; 2151 b->ht_insert_ct = 0; 2152 2153 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2154 b->ijonly = PETSC_FALSE; 2155 2156 b->in_loc = NULL; 2157 b->v_loc = NULL; 2158 b->n_loc = 0; 2159 2160 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2161 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2162 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2163 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2164 #if defined(PETSC_HAVE_ELEMENTAL) 2165 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2166 #endif 2167 #if defined(PETSC_HAVE_SCALAPACK) 2168 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2169 #endif 2170 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2171 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2172 2173 B->symmetric = PETSC_BOOL3_TRUE; 2174 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2175 B->symmetry_eternal = PETSC_TRUE; 2176 B->structural_symmetry_eternal = PETSC_TRUE; 2177 #if defined(PETSC_USE_COMPLEX) 2178 B->hermitian = PETSC_BOOL3_FALSE; 2179 #else 2180 B->hermitian = PETSC_BOOL3_TRUE; 2181 #endif 2182 2183 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2184 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2185 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2186 if (flg) { 2187 PetscReal fact = 1.39; 2188 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2189 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2190 if (fact <= 1.0) fact = 1.39; 2191 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2192 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2193 } 2194 PetscOptionsEnd(); 2195 PetscFunctionReturn(PETSC_SUCCESS); 2196 } 2197 2198 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2199 /*MC 2200 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2201 2202 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2203 and `MATMPISBAIJ` otherwise. 2204 2205 Options Database Key: 2206 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2207 2208 Level: beginner 2209 2210 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2211 M*/ 2212 2213 /*@C 2214 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2215 the user should preallocate the matrix storage by setting the parameters 2216 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2217 performance can be increased by more than a factor of 50. 2218 2219 Collective 2220 2221 Input Parameters: 2222 + B - the matrix 2223 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2224 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2225 . d_nz - number of block nonzeros per block row in diagonal portion of local 2226 submatrix (same for all local rows) 2227 . d_nnz - array containing the number of block nonzeros in the various block rows 2228 in the upper triangular and diagonal part of the in diagonal portion of the local 2229 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2230 for the diagonal entry and set a value even if it is zero. 2231 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2232 submatrix (same for all local rows). 2233 - o_nnz - array containing the number of nonzeros in the various block rows of the 2234 off-diagonal portion of the local submatrix that is right of the diagonal 2235 (possibly different for each block row) or `NULL`. 2236 2237 Options Database Keys: 2238 + -mat_no_unroll - uses code that does not unroll the loops in the 2239 block calculations (much slower) 2240 - -mat_block_size - size of the blocks to use 2241 2242 Level: intermediate 2243 2244 Notes: 2245 2246 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2247 than it must be used on all processors that share the object for that argument. 2248 2249 If the *_nnz parameter is given then the *_nz parameter is ignored 2250 2251 Storage Information: 2252 For a square global matrix we define each processor's diagonal portion 2253 to be its local rows and the corresponding columns (a square submatrix); 2254 each processor's off-diagonal portion encompasses the remainder of the 2255 local matrix (a rectangular submatrix). 2256 2257 The user can specify preallocated storage for the diagonal part of 2258 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2259 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2260 memory allocation. Likewise, specify preallocated storage for the 2261 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2262 2263 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2264 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2265 You can also run with the option `-info` and look for messages with the string 2266 malloc in them to see if additional memory allocation was needed. 2267 2268 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2269 the figure below we depict these three local rows and all columns (0-11). 2270 2271 .vb 2272 0 1 2 3 4 5 6 7 8 9 10 11 2273 -------------------------- 2274 row 3 |. . . d d d o o o o o o 2275 row 4 |. . . d d d o o o o o o 2276 row 5 |. . . d d d o o o o o o 2277 -------------------------- 2278 .ve 2279 2280 Thus, any entries in the d locations are stored in the d (diagonal) 2281 submatrix, and any entries in the o locations are stored in the 2282 o (off-diagonal) submatrix. Note that the d matrix is stored in 2283 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2284 2285 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2286 plus the diagonal part of the d matrix, 2287 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2288 2289 In general, for PDE problems in which most nonzeros are near the diagonal, 2290 one expects `d_nz` >> `o_nz`. 2291 2292 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2293 @*/ 2294 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2295 { 2296 PetscFunctionBegin; 2297 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2298 PetscValidType(B, 1); 2299 PetscValidLogicalCollectiveInt(B, bs, 2); 2300 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2301 PetscFunctionReturn(PETSC_SUCCESS); 2302 } 2303 2304 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2305 /*@C 2306 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2307 (block compressed row). For good matrix assembly performance 2308 the user should preallocate the matrix storage by setting the parameters 2309 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2310 2311 Collective 2312 2313 Input Parameters: 2314 + comm - MPI communicator 2315 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2316 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2317 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2318 This value should be the same as the local size used in creating the 2319 y vector for the matrix-vector product y = Ax. 2320 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2321 This value should be the same as the local size used in creating the 2322 x vector for the matrix-vector product y = Ax. 2323 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2324 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2325 . d_nz - number of block nonzeros per block row in diagonal portion of local 2326 submatrix (same for all local rows) 2327 . d_nnz - array containing the number of block nonzeros in the various block rows 2328 in the upper triangular portion of the in diagonal portion of the local 2329 (possibly different for each block block row) or `NULL`. 2330 If you plan to factor the matrix you must leave room for the diagonal entry and 2331 set its value even if it is zero. 2332 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2333 submatrix (same for all local rows). 2334 - o_nnz - array containing the number of nonzeros in the various block rows of the 2335 off-diagonal portion of the local submatrix (possibly different for 2336 each block row) or `NULL`. 2337 2338 Output Parameter: 2339 . A - the matrix 2340 2341 Options Database Keys: 2342 + -mat_no_unroll - uses code that does not unroll the loops in the 2343 block calculations (much slower) 2344 . -mat_block_size - size of the blocks to use 2345 - -mat_mpi - use the parallel matrix data structures even on one processor 2346 (defaults to using SeqBAIJ format on one processor) 2347 2348 Level: intermediate 2349 2350 Notes: 2351 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2352 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2353 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2354 2355 The number of rows and columns must be divisible by blocksize. 2356 This matrix type does not support complex Hermitian operation. 2357 2358 The user MUST specify either the local or global matrix dimensions 2359 (possibly both). 2360 2361 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2362 than it must be used on all processors that share the object for that argument. 2363 2364 If the *_nnz parameter is given then the *_nz parameter is ignored 2365 2366 Storage Information: 2367 For a square global matrix we define each processor's diagonal portion 2368 to be its local rows and the corresponding columns (a square submatrix); 2369 each processor's off-diagonal portion encompasses the remainder of the 2370 local matrix (a rectangular submatrix). 2371 2372 The user can specify preallocated storage for the diagonal part of 2373 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2374 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2375 memory allocation. Likewise, specify preallocated storage for the 2376 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2377 2378 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2379 the figure below we depict these three local rows and all columns (0-11). 2380 2381 .vb 2382 0 1 2 3 4 5 6 7 8 9 10 11 2383 -------------------------- 2384 row 3 |. . . d d d o o o o o o 2385 row 4 |. . . d d d o o o o o o 2386 row 5 |. . . d d d o o o o o o 2387 -------------------------- 2388 .ve 2389 2390 Thus, any entries in the d locations are stored in the d (diagonal) 2391 submatrix, and any entries in the o locations are stored in the 2392 o (off-diagonal) submatrix. Note that the d matrix is stored in 2393 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2394 2395 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2396 plus the diagonal part of the d matrix, 2397 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2398 In general, for PDE problems in which most nonzeros are near the diagonal, 2399 one expects `d_nz` >> `o_nz`. 2400 2401 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2402 @*/ 2403 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2404 { 2405 PetscMPIInt size; 2406 2407 PetscFunctionBegin; 2408 PetscCall(MatCreate(comm, A)); 2409 PetscCall(MatSetSizes(*A, m, n, M, N)); 2410 PetscCallMPI(MPI_Comm_size(comm, &size)); 2411 if (size > 1) { 2412 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2413 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2414 } else { 2415 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2416 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2417 } 2418 PetscFunctionReturn(PETSC_SUCCESS); 2419 } 2420 2421 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2422 { 2423 Mat mat; 2424 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2425 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2426 PetscScalar *array; 2427 2428 PetscFunctionBegin; 2429 *newmat = NULL; 2430 2431 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2432 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2433 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2434 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2435 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2436 2437 mat->factortype = matin->factortype; 2438 mat->preallocated = PETSC_TRUE; 2439 mat->assembled = PETSC_TRUE; 2440 mat->insertmode = NOT_SET_VALUES; 2441 2442 a = (Mat_MPISBAIJ *)mat->data; 2443 a->bs2 = oldmat->bs2; 2444 a->mbs = oldmat->mbs; 2445 a->nbs = oldmat->nbs; 2446 a->Mbs = oldmat->Mbs; 2447 a->Nbs = oldmat->Nbs; 2448 2449 a->size = oldmat->size; 2450 a->rank = oldmat->rank; 2451 a->donotstash = oldmat->donotstash; 2452 a->roworiented = oldmat->roworiented; 2453 a->rowindices = NULL; 2454 a->rowvalues = NULL; 2455 a->getrowactive = PETSC_FALSE; 2456 a->barray = NULL; 2457 a->rstartbs = oldmat->rstartbs; 2458 a->rendbs = oldmat->rendbs; 2459 a->cstartbs = oldmat->cstartbs; 2460 a->cendbs = oldmat->cendbs; 2461 2462 /* hash table stuff */ 2463 a->ht = NULL; 2464 a->hd = NULL; 2465 a->ht_size = 0; 2466 a->ht_flag = oldmat->ht_flag; 2467 a->ht_fact = oldmat->ht_fact; 2468 a->ht_total_ct = 0; 2469 a->ht_insert_ct = 0; 2470 2471 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2472 if (oldmat->colmap) { 2473 #if defined(PETSC_USE_CTABLE) 2474 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2475 #else 2476 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2477 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2478 #endif 2479 } else a->colmap = NULL; 2480 2481 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2482 PetscCall(PetscMalloc1(len, &a->garray)); 2483 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2484 } else a->garray = NULL; 2485 2486 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2487 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2488 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2489 2490 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2491 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2492 2493 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2494 PetscCall(VecGetArray(a->slvec1, &array)); 2495 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2496 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2497 PetscCall(VecRestoreArray(a->slvec1, &array)); 2498 PetscCall(VecGetArray(a->slvec0, &array)); 2499 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2500 PetscCall(VecRestoreArray(a->slvec0, &array)); 2501 2502 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2503 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2504 a->sMvctx = oldmat->sMvctx; 2505 2506 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2507 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2508 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2509 *newmat = mat; 2510 PetscFunctionReturn(PETSC_SUCCESS); 2511 } 2512 2513 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2514 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2515 2516 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2517 { 2518 PetscBool isbinary; 2519 2520 PetscFunctionBegin; 2521 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2522 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2523 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2524 PetscFunctionReturn(PETSC_SUCCESS); 2525 } 2526 2527 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2528 { 2529 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2530 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2531 PetscReal atmp; 2532 PetscReal *work, *svalues, *rvalues; 2533 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2534 PetscMPIInt rank, size; 2535 PetscInt *rowners_bs, dest, count, source; 2536 PetscScalar *va; 2537 MatScalar *ba; 2538 MPI_Status stat; 2539 2540 PetscFunctionBegin; 2541 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2542 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2543 PetscCall(VecGetArray(v, &va)); 2544 2545 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2546 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2547 2548 bs = A->rmap->bs; 2549 mbs = a->mbs; 2550 Mbs = a->Mbs; 2551 ba = b->a; 2552 bi = b->i; 2553 bj = b->j; 2554 2555 /* find ownerships */ 2556 rowners_bs = A->rmap->range; 2557 2558 /* each proc creates an array to be distributed */ 2559 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2560 2561 /* row_max for B */ 2562 if (rank != size - 1) { 2563 for (i = 0; i < mbs; i++) { 2564 ncols = bi[1] - bi[0]; 2565 bi++; 2566 brow = bs * i; 2567 for (j = 0; j < ncols; j++) { 2568 bcol = bs * (*bj); 2569 for (kcol = 0; kcol < bs; kcol++) { 2570 col = bcol + kcol; /* local col index */ 2571 col += rowners_bs[rank + 1]; /* global col index */ 2572 for (krow = 0; krow < bs; krow++) { 2573 atmp = PetscAbsScalar(*ba); 2574 ba++; 2575 row = brow + krow; /* local row index */ 2576 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2577 if (work[col] < atmp) work[col] = atmp; 2578 } 2579 } 2580 bj++; 2581 } 2582 } 2583 2584 /* send values to its owners */ 2585 for (dest = rank + 1; dest < size; dest++) { 2586 svalues = work + rowners_bs[dest]; 2587 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2588 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2589 } 2590 } 2591 2592 /* receive values */ 2593 if (rank) { 2594 rvalues = work; 2595 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2596 for (source = 0; source < rank; source++) { 2597 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2598 /* process values */ 2599 for (i = 0; i < count; i++) { 2600 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2601 } 2602 } 2603 } 2604 2605 PetscCall(VecRestoreArray(v, &va)); 2606 PetscCall(PetscFree(work)); 2607 PetscFunctionReturn(PETSC_SUCCESS); 2608 } 2609 2610 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2611 { 2612 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2613 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2614 PetscScalar *x, *ptr, *from; 2615 Vec bb1; 2616 const PetscScalar *b; 2617 2618 PetscFunctionBegin; 2619 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2620 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2621 2622 if (flag == SOR_APPLY_UPPER) { 2623 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2624 PetscFunctionReturn(PETSC_SUCCESS); 2625 } 2626 2627 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2628 if (flag & SOR_ZERO_INITIAL_GUESS) { 2629 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2630 its--; 2631 } 2632 2633 PetscCall(VecDuplicate(bb, &bb1)); 2634 while (its--) { 2635 /* lower triangular part: slvec0b = - B^T*xx */ 2636 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2637 2638 /* copy xx into slvec0a */ 2639 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2640 PetscCall(VecGetArray(xx, &x)); 2641 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2642 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2643 2644 PetscCall(VecScale(mat->slvec0, -1.0)); 2645 2646 /* copy bb into slvec1a */ 2647 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2648 PetscCall(VecGetArrayRead(bb, &b)); 2649 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2650 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2651 2652 /* set slvec1b = 0 */ 2653 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2654 PetscCall(VecZeroEntries(mat->slvec1b)); 2655 2656 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2657 PetscCall(VecRestoreArray(xx, &x)); 2658 PetscCall(VecRestoreArrayRead(bb, &b)); 2659 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2660 2661 /* upper triangular part: bb1 = bb1 - B*x */ 2662 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2663 2664 /* local diagonal sweep */ 2665 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2666 } 2667 PetscCall(VecDestroy(&bb1)); 2668 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2669 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2670 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2671 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2672 } else if (flag & SOR_EISENSTAT) { 2673 Vec xx1; 2674 PetscBool hasop; 2675 const PetscScalar *diag; 2676 PetscScalar *sl, scale = (omega - 2.0) / omega; 2677 PetscInt i, n; 2678 2679 if (!mat->xx1) { 2680 PetscCall(VecDuplicate(bb, &mat->xx1)); 2681 PetscCall(VecDuplicate(bb, &mat->bb1)); 2682 } 2683 xx1 = mat->xx1; 2684 bb1 = mat->bb1; 2685 2686 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2687 2688 if (!mat->diag) { 2689 /* this is wrong for same matrix with new nonzero values */ 2690 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2691 PetscCall(MatGetDiagonal(matin, mat->diag)); 2692 } 2693 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2694 2695 if (hasop) { 2696 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2697 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2698 } else { 2699 /* 2700 These two lines are replaced by code that may be a bit faster for a good compiler 2701 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2702 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2703 */ 2704 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2705 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2706 PetscCall(VecGetArrayRead(bb, &b)); 2707 PetscCall(VecGetArray(xx, &x)); 2708 PetscCall(VecGetLocalSize(xx, &n)); 2709 if (omega == 1.0) { 2710 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2711 PetscCall(PetscLogFlops(2.0 * n)); 2712 } else { 2713 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2714 PetscCall(PetscLogFlops(3.0 * n)); 2715 } 2716 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2717 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2718 PetscCall(VecRestoreArrayRead(bb, &b)); 2719 PetscCall(VecRestoreArray(xx, &x)); 2720 } 2721 2722 /* multiply off-diagonal portion of matrix */ 2723 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2724 PetscCall(VecZeroEntries(mat->slvec1b)); 2725 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2726 PetscCall(VecGetArray(mat->slvec0, &from)); 2727 PetscCall(VecGetArray(xx, &x)); 2728 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2729 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2730 PetscCall(VecRestoreArray(xx, &x)); 2731 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2732 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2733 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2734 2735 /* local sweep */ 2736 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2737 PetscCall(VecAXPY(xx, 1.0, xx1)); 2738 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2739 PetscFunctionReturn(PETSC_SUCCESS); 2740 } 2741 2742 /*@ 2743 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2744 CSR format the local rows. 2745 2746 Collective 2747 2748 Input Parameters: 2749 + comm - MPI communicator 2750 . bs - the block size, only a block size of 1 is supported 2751 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2752 . n - This value should be the same as the local size used in creating the 2753 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2754 calculated if `N` is given) For square matrices `n` is almost always `m`. 2755 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2756 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2757 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2758 . j - column indices 2759 - a - matrix values 2760 2761 Output Parameter: 2762 . mat - the matrix 2763 2764 Level: intermediate 2765 2766 Notes: 2767 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2768 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2769 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2770 2771 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2772 2773 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2774 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2775 @*/ 2776 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2777 { 2778 PetscFunctionBegin; 2779 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2780 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2781 PetscCall(MatCreate(comm, mat)); 2782 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2783 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2784 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2785 PetscFunctionReturn(PETSC_SUCCESS); 2786 } 2787 2788 /*@C 2789 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2790 2791 Collective 2792 2793 Input Parameters: 2794 + B - the matrix 2795 . bs - the block size 2796 . i - the indices into `j` for the start of each local row (starts with zero) 2797 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2798 - v - optional values in the matrix 2799 2800 Level: advanced 2801 2802 Notes: 2803 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2804 and usually the numerical values as well 2805 2806 Any entries below the diagonal are ignored 2807 2808 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2809 @*/ 2810 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2811 { 2812 PetscFunctionBegin; 2813 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2814 PetscFunctionReturn(PETSC_SUCCESS); 2815 } 2816 2817 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2818 { 2819 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2820 PetscInt *indx; 2821 PetscScalar *values; 2822 2823 PetscFunctionBegin; 2824 PetscCall(MatGetSize(inmat, &m, &N)); 2825 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2826 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2827 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2828 PetscInt *bindx, rmax = a->rmax, j; 2829 PetscMPIInt rank, size; 2830 2831 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2832 mbs = m / bs; 2833 Nbs = N / cbs; 2834 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2835 nbs = n / cbs; 2836 2837 PetscCall(PetscMalloc1(rmax, &bindx)); 2838 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2839 2840 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2841 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2842 if (rank == size - 1) { 2843 /* Check sum(nbs) = Nbs */ 2844 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2845 } 2846 2847 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2848 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2849 for (i = 0; i < mbs; i++) { 2850 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2851 nnz = nnz / bs; 2852 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2853 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2854 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2855 } 2856 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2857 PetscCall(PetscFree(bindx)); 2858 2859 PetscCall(MatCreate(comm, outmat)); 2860 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2861 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2862 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2863 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2864 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2865 MatPreallocateEnd(dnz, onz); 2866 } 2867 2868 /* numeric phase */ 2869 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2870 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2871 2872 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2873 for (i = 0; i < m; i++) { 2874 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2875 Ii = i + rstart; 2876 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2877 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2878 } 2879 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2880 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2881 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2882 PetscFunctionReturn(PETSC_SUCCESS); 2883 } 2884