1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 #if defined(PETSC_USE_LOG) 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 #endif 14 PetscCall(MatStashDestroy_Private(&mat->stash)); 15 PetscCall(MatStashDestroy_Private(&mat->bstash)); 16 PetscCall(MatDestroy(&baij->A)); 17 PetscCall(MatDestroy(&baij->B)); 18 #if defined(PETSC_USE_CTABLE) 19 PetscCall(PetscHMapIDestroy(&baij->colmap)); 20 #else 21 PetscCall(PetscFree(baij->colmap)); 22 #endif 23 PetscCall(PetscFree(baij->garray)); 24 PetscCall(VecDestroy(&baij->lvec)); 25 PetscCall(VecScatterDestroy(&baij->Mvctx)); 26 PetscCall(VecDestroy(&baij->slvec0)); 27 PetscCall(VecDestroy(&baij->slvec0b)); 28 PetscCall(VecDestroy(&baij->slvec1)); 29 PetscCall(VecDestroy(&baij->slvec1a)); 30 PetscCall(VecDestroy(&baij->slvec1b)); 31 PetscCall(VecScatterDestroy(&baij->sMvctx)); 32 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 33 PetscCall(PetscFree(baij->barray)); 34 PetscCall(PetscFree(baij->hd)); 35 PetscCall(VecDestroy(&baij->diag)); 36 PetscCall(VecDestroy(&baij->bb1)); 37 PetscCall(VecDestroy(&baij->xx1)); 38 #if defined(PETSC_USE_REAL_MAT_SINGLE) 39 PetscCall(PetscFree(baij->setvaluescopy)); 40 #endif 41 PetscCall(PetscFree(baij->in_loc)); 42 PetscCall(PetscFree(baij->v_loc)); 43 PetscCall(PetscFree(baij->rangebs)); 44 PetscCall(PetscFree(mat->data)); 45 46 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 51 #if defined(PETSC_HAVE_ELEMENTAL) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 53 #endif 54 #if defined(PETSC_HAVE_SCALAPACK) 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 56 #endif 57 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 58 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 59 PetscFunctionReturn(PETSC_SUCCESS); 60 } 61 62 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 63 #define TYPE SBAIJ 64 #define TYPE_SBAIJ 65 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 66 #undef TYPE 67 #undef TYPE_SBAIJ 68 69 #if defined(PETSC_HAVE_ELEMENTAL) 70 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 71 #endif 72 #if defined(PETSC_HAVE_SCALAPACK) 73 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 74 #endif 75 76 /* This could be moved to matimpl.h */ 77 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 78 { 79 Mat preallocator; 80 PetscInt r, rstart, rend; 81 PetscInt bs, i, m, n, M, N; 82 PetscBool cong = PETSC_TRUE; 83 84 PetscFunctionBegin; 85 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 86 PetscValidLogicalCollectiveInt(B, nm, 2); 87 for (i = 0; i < nm; i++) { 88 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 89 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 90 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 91 } 92 PetscValidLogicalCollectiveBool(B, fill, 5); 93 PetscCall(MatGetBlockSize(B, &bs)); 94 PetscCall(MatGetSize(B, &M, &N)); 95 PetscCall(MatGetLocalSize(B, &m, &n)); 96 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 97 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 98 PetscCall(MatSetBlockSize(preallocator, bs)); 99 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 100 PetscCall(MatSetUp(preallocator)); 101 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 102 for (r = rstart; r < rend; ++r) { 103 PetscInt ncols; 104 const PetscInt *row; 105 const PetscScalar *vals; 106 107 for (i = 0; i < nm; i++) { 108 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 109 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 110 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 111 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 112 } 113 } 114 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 115 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 116 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 117 PetscCall(MatDestroy(&preallocator)); 118 PetscFunctionReturn(PETSC_SUCCESS); 119 } 120 121 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 122 { 123 Mat B; 124 PetscInt r; 125 126 PetscFunctionBegin; 127 if (reuse != MAT_REUSE_MATRIX) { 128 PetscBool symm = PETSC_TRUE, isdense; 129 PetscInt bs; 130 131 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 132 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 133 PetscCall(MatSetType(B, newtype)); 134 PetscCall(MatGetBlockSize(A, &bs)); 135 PetscCall(MatSetBlockSize(B, bs)); 136 PetscCall(PetscLayoutSetUp(B->rmap)); 137 PetscCall(PetscLayoutSetUp(B->cmap)); 138 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 139 if (!isdense) { 140 PetscCall(MatGetRowUpperTriangular(A)); 141 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 142 PetscCall(MatRestoreRowUpperTriangular(A)); 143 } else { 144 PetscCall(MatSetUp(B)); 145 } 146 } else { 147 B = *newmat; 148 PetscCall(MatZeroEntries(B)); 149 } 150 151 PetscCall(MatGetRowUpperTriangular(A)); 152 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 153 PetscInt ncols; 154 const PetscInt *row; 155 const PetscScalar *vals; 156 157 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 158 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 159 #if defined(PETSC_USE_COMPLEX) 160 if (A->hermitian == PETSC_BOOL3_TRUE) { 161 PetscInt i; 162 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 163 } else { 164 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 165 } 166 #else 167 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 168 #endif 169 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 170 } 171 PetscCall(MatRestoreRowUpperTriangular(A)); 172 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 173 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 174 175 if (reuse == MAT_INPLACE_MATRIX) { 176 PetscCall(MatHeaderReplace(A, &B)); 177 } else { 178 *newmat = B; 179 } 180 PetscFunctionReturn(PETSC_SUCCESS); 181 } 182 183 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 184 { 185 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 186 187 PetscFunctionBegin; 188 PetscCall(MatStoreValues(aij->A)); 189 PetscCall(MatStoreValues(aij->B)); 190 PetscFunctionReturn(PETSC_SUCCESS); 191 } 192 193 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 194 { 195 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 196 197 PetscFunctionBegin; 198 PetscCall(MatRetrieveValues(aij->A)); 199 PetscCall(MatRetrieveValues(aij->B)); 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 204 { \ 205 brow = row / bs; \ 206 rp = aj + ai[brow]; \ 207 ap = aa + bs2 * ai[brow]; \ 208 rmax = aimax[brow]; \ 209 nrow = ailen[brow]; \ 210 bcol = col / bs; \ 211 ridx = row % bs; \ 212 cidx = col % bs; \ 213 low = 0; \ 214 high = nrow; \ 215 while (high - low > 3) { \ 216 t = (low + high) / 2; \ 217 if (rp[t] > bcol) high = t; \ 218 else low = t; \ 219 } \ 220 for (_i = low; _i < high; _i++) { \ 221 if (rp[_i] > bcol) break; \ 222 if (rp[_i] == bcol) { \ 223 bap = ap + bs2 * _i + bs * cidx + ridx; \ 224 if (addv == ADD_VALUES) *bap += value; \ 225 else *bap = value; \ 226 goto a_noinsert; \ 227 } \ 228 } \ 229 if (a->nonew == 1) goto a_noinsert; \ 230 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 231 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 232 N = nrow++ - 1; \ 233 /* shift up all the later entries in this row */ \ 234 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 235 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 236 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 237 rp[_i] = bcol; \ 238 ap[bs2 * _i + bs * cidx + ridx] = value; \ 239 A->nonzerostate++; \ 240 a_noinsert:; \ 241 ailen[brow] = nrow; \ 242 } 243 244 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 245 { \ 246 brow = row / bs; \ 247 rp = bj + bi[brow]; \ 248 ap = ba + bs2 * bi[brow]; \ 249 rmax = bimax[brow]; \ 250 nrow = bilen[brow]; \ 251 bcol = col / bs; \ 252 ridx = row % bs; \ 253 cidx = col % bs; \ 254 low = 0; \ 255 high = nrow; \ 256 while (high - low > 3) { \ 257 t = (low + high) / 2; \ 258 if (rp[t] > bcol) high = t; \ 259 else low = t; \ 260 } \ 261 for (_i = low; _i < high; _i++) { \ 262 if (rp[_i] > bcol) break; \ 263 if (rp[_i] == bcol) { \ 264 bap = ap + bs2 * _i + bs * cidx + ridx; \ 265 if (addv == ADD_VALUES) *bap += value; \ 266 else *bap = value; \ 267 goto b_noinsert; \ 268 } \ 269 } \ 270 if (b->nonew == 1) goto b_noinsert; \ 271 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 272 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 273 N = nrow++ - 1; \ 274 /* shift up all the later entries in this row */ \ 275 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 276 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 277 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 278 rp[_i] = bcol; \ 279 ap[bs2 * _i + bs * cidx + ridx] = value; \ 280 B->nonzerostate++; \ 281 b_noinsert:; \ 282 bilen[brow] = nrow; \ 283 } 284 285 /* Only add/insert a(i,j) with i<=j (blocks). 286 Any a(i,j) with i>j input by user is ignored or generates an error 287 */ 288 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 289 { 290 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 291 MatScalar value; 292 PetscBool roworiented = baij->roworiented; 293 PetscInt i, j, row, col; 294 PetscInt rstart_orig = mat->rmap->rstart; 295 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 296 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 297 298 /* Some Variables required in the macro */ 299 Mat A = baij->A; 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 301 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 302 MatScalar *aa = a->a; 303 304 Mat B = baij->B; 305 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 306 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 307 MatScalar *ba = b->a; 308 309 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 310 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 311 MatScalar *ap, *bap; 312 313 /* for stash */ 314 PetscInt n_loc, *in_loc = NULL; 315 MatScalar *v_loc = NULL; 316 317 PetscFunctionBegin; 318 if (!baij->donotstash) { 319 if (n > baij->n_loc) { 320 PetscCall(PetscFree(baij->in_loc)); 321 PetscCall(PetscFree(baij->v_loc)); 322 PetscCall(PetscMalloc1(n, &baij->in_loc)); 323 PetscCall(PetscMalloc1(n, &baij->v_loc)); 324 325 baij->n_loc = n; 326 } 327 in_loc = baij->in_loc; 328 v_loc = baij->v_loc; 329 } 330 331 for (i = 0; i < m; i++) { 332 if (im[i] < 0) continue; 333 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 334 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 335 row = im[i] - rstart_orig; /* local row index */ 336 for (j = 0; j < n; j++) { 337 if (im[i] / bs > in[j] / bs) { 338 if (a->ignore_ltriangular) { 339 continue; /* ignore lower triangular blocks */ 340 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 341 } 342 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 343 col = in[j] - cstart_orig; /* local col index */ 344 brow = row / bs; 345 bcol = col / bs; 346 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 347 if (roworiented) value = v[i * n + j]; 348 else value = v[i + j * m]; 349 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 350 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 351 } else if (in[j] < 0) { 352 continue; 353 } else { 354 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 355 /* off-diag entry (B) */ 356 if (mat->was_assembled) { 357 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 358 #if defined(PETSC_USE_CTABLE) 359 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 360 col = col - 1; 361 #else 362 col = baij->colmap[in[j] / bs] - 1; 363 #endif 364 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 365 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 366 col = in[j]; 367 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 368 B = baij->B; 369 b = (Mat_SeqBAIJ *)(B)->data; 370 bimax = b->imax; 371 bi = b->i; 372 bilen = b->ilen; 373 bj = b->j; 374 ba = b->a; 375 } else col += in[j] % bs; 376 } else col = in[j]; 377 if (roworiented) value = v[i * n + j]; 378 else value = v[i + j * m]; 379 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 380 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 381 } 382 } 383 } else { /* off processor entry */ 384 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 385 if (!baij->donotstash) { 386 mat->assembled = PETSC_FALSE; 387 n_loc = 0; 388 for (j = 0; j < n; j++) { 389 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 390 in_loc[n_loc] = in[j]; 391 if (roworiented) { 392 v_loc[n_loc] = v[i * n + j]; 393 } else { 394 v_loc[n_loc] = v[j * m + i]; 395 } 396 n_loc++; 397 } 398 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 399 } 400 } 401 } 402 PetscFunctionReturn(PETSC_SUCCESS); 403 } 404 405 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 406 { 407 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 408 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 409 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 410 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 411 PetscBool roworiented = a->roworiented; 412 const PetscScalar *value = v; 413 MatScalar *ap, *aa = a->a, *bap; 414 415 PetscFunctionBegin; 416 if (col < row) { 417 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 418 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 419 } 420 rp = aj + ai[row]; 421 ap = aa + bs2 * ai[row]; 422 rmax = imax[row]; 423 nrow = ailen[row]; 424 value = v; 425 low = 0; 426 high = nrow; 427 428 while (high - low > 7) { 429 t = (low + high) / 2; 430 if (rp[t] > col) high = t; 431 else low = t; 432 } 433 for (i = low; i < high; i++) { 434 if (rp[i] > col) break; 435 if (rp[i] == col) { 436 bap = ap + bs2 * i; 437 if (roworiented) { 438 if (is == ADD_VALUES) { 439 for (ii = 0; ii < bs; ii++) { 440 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 441 } 442 } else { 443 for (ii = 0; ii < bs; ii++) { 444 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 445 } 446 } 447 } else { 448 if (is == ADD_VALUES) { 449 for (ii = 0; ii < bs; ii++) { 450 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 451 } 452 } else { 453 for (ii = 0; ii < bs; ii++) { 454 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 455 } 456 } 457 } 458 goto noinsert2; 459 } 460 } 461 if (nonew == 1) goto noinsert2; 462 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 463 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 464 N = nrow++ - 1; 465 high++; 466 /* shift up all the later entries in this row */ 467 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 468 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 469 rp[i] = col; 470 bap = ap + bs2 * i; 471 if (roworiented) { 472 for (ii = 0; ii < bs; ii++) { 473 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 474 } 475 } else { 476 for (ii = 0; ii < bs; ii++) { 477 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 478 } 479 } 480 noinsert2:; 481 ailen[row] = nrow; 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 /* 486 This routine is exactly duplicated in mpibaij.c 487 */ 488 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 489 { 490 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 491 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 492 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 493 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 494 PetscBool roworiented = a->roworiented; 495 const PetscScalar *value = v; 496 MatScalar *ap, *aa = a->a, *bap; 497 498 PetscFunctionBegin; 499 rp = aj + ai[row]; 500 ap = aa + bs2 * ai[row]; 501 rmax = imax[row]; 502 nrow = ailen[row]; 503 low = 0; 504 high = nrow; 505 value = v; 506 while (high - low > 7) { 507 t = (low + high) / 2; 508 if (rp[t] > col) high = t; 509 else low = t; 510 } 511 for (i = low; i < high; i++) { 512 if (rp[i] > col) break; 513 if (rp[i] == col) { 514 bap = ap + bs2 * i; 515 if (roworiented) { 516 if (is == ADD_VALUES) { 517 for (ii = 0; ii < bs; ii++) { 518 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 519 } 520 } else { 521 for (ii = 0; ii < bs; ii++) { 522 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 523 } 524 } 525 } else { 526 if (is == ADD_VALUES) { 527 for (ii = 0; ii < bs; ii++, value += bs) { 528 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 529 bap += bs; 530 } 531 } else { 532 for (ii = 0; ii < bs; ii++, value += bs) { 533 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 534 bap += bs; 535 } 536 } 537 } 538 goto noinsert2; 539 } 540 } 541 if (nonew == 1) goto noinsert2; 542 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 543 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 544 N = nrow++ - 1; 545 high++; 546 /* shift up all the later entries in this row */ 547 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 548 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 549 rp[i] = col; 550 bap = ap + bs2 * i; 551 if (roworiented) { 552 for (ii = 0; ii < bs; ii++) { 553 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 554 } 555 } else { 556 for (ii = 0; ii < bs; ii++) { 557 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 558 } 559 } 560 noinsert2:; 561 ailen[row] = nrow; 562 PetscFunctionReturn(PETSC_SUCCESS); 563 } 564 565 /* 566 This routine could be optimized by removing the need for the block copy below and passing stride information 567 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 568 */ 569 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 570 { 571 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 572 const MatScalar *value; 573 MatScalar *barray = baij->barray; 574 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 575 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 576 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 577 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 578 579 PetscFunctionBegin; 580 if (!barray) { 581 PetscCall(PetscMalloc1(bs2, &barray)); 582 baij->barray = barray; 583 } 584 585 if (roworiented) { 586 stepval = (n - 1) * bs; 587 } else { 588 stepval = (m - 1) * bs; 589 } 590 for (i = 0; i < m; i++) { 591 if (im[i] < 0) continue; 592 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 593 if (im[i] >= rstart && im[i] < rend) { 594 row = im[i] - rstart; 595 for (j = 0; j < n; j++) { 596 if (im[i] > in[j]) { 597 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 598 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 599 } 600 /* If NumCol = 1 then a copy is not required */ 601 if ((roworiented) && (n == 1)) { 602 barray = (MatScalar *)v + i * bs2; 603 } else if ((!roworiented) && (m == 1)) { 604 barray = (MatScalar *)v + j * bs2; 605 } else { /* Here a copy is required */ 606 if (roworiented) { 607 value = v + i * (stepval + bs) * bs + j * bs; 608 } else { 609 value = v + j * (stepval + bs) * bs + i * bs; 610 } 611 for (ii = 0; ii < bs; ii++, value += stepval) { 612 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 613 } 614 barray -= bs2; 615 } 616 617 if (in[j] >= cstart && in[j] < cend) { 618 col = in[j] - cstart; 619 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 620 } else if (in[j] < 0) { 621 continue; 622 } else { 623 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 624 if (mat->was_assembled) { 625 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 626 627 #if defined(PETSC_USE_DEBUG) 628 #if defined(PETSC_USE_CTABLE) 629 { 630 PetscInt data; 631 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 632 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 633 } 634 #else 635 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 636 #endif 637 #endif 638 #if defined(PETSC_USE_CTABLE) 639 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 640 col = (col - 1) / bs; 641 #else 642 col = (baij->colmap[in[j]] - 1) / bs; 643 #endif 644 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 645 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 646 col = in[j]; 647 } 648 } else col = in[j]; 649 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 650 } 651 } 652 } else { 653 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 654 if (!baij->donotstash) { 655 if (roworiented) { 656 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 657 } else { 658 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 659 } 660 } 661 } 662 } 663 PetscFunctionReturn(PETSC_SUCCESS); 664 } 665 666 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 667 { 668 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 669 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 670 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 671 672 PetscFunctionBegin; 673 for (i = 0; i < m; i++) { 674 if (idxm[i] < 0) continue; /* negative row */ 675 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 676 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 677 row = idxm[i] - bsrstart; 678 for (j = 0; j < n; j++) { 679 if (idxn[j] < 0) continue; /* negative column */ 680 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 681 if (idxn[j] >= bscstart && idxn[j] < bscend) { 682 col = idxn[j] - bscstart; 683 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 684 } else { 685 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 686 #if defined(PETSC_USE_CTABLE) 687 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 688 data--; 689 #else 690 data = baij->colmap[idxn[j] / bs] - 1; 691 #endif 692 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 693 else { 694 col = data + idxn[j] % bs; 695 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 696 } 697 } 698 } 699 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 700 } 701 PetscFunctionReturn(PETSC_SUCCESS); 702 } 703 704 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 705 { 706 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 707 PetscReal sum[2], *lnorm2; 708 709 PetscFunctionBegin; 710 if (baij->size == 1) { 711 PetscCall(MatNorm(baij->A, type, norm)); 712 } else { 713 if (type == NORM_FROBENIUS) { 714 PetscCall(PetscMalloc1(2, &lnorm2)); 715 PetscCall(MatNorm(baij->A, type, lnorm2)); 716 *lnorm2 = (*lnorm2) * (*lnorm2); 717 lnorm2++; /* squar power of norm(A) */ 718 PetscCall(MatNorm(baij->B, type, lnorm2)); 719 *lnorm2 = (*lnorm2) * (*lnorm2); 720 lnorm2--; /* squar power of norm(B) */ 721 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 722 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 723 PetscCall(PetscFree(lnorm2)); 724 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 725 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 726 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 727 PetscReal *rsum, *rsum2, vabs; 728 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 729 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 730 MatScalar *v; 731 732 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 733 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 734 /* Amat */ 735 v = amat->a; 736 jj = amat->j; 737 for (brow = 0; brow < mbs; brow++) { 738 grow = bs * (rstart + brow); 739 nz = amat->i[brow + 1] - amat->i[brow]; 740 for (bcol = 0; bcol < nz; bcol++) { 741 gcol = bs * (rstart + *jj); 742 jj++; 743 for (col = 0; col < bs; col++) { 744 for (row = 0; row < bs; row++) { 745 vabs = PetscAbsScalar(*v); 746 v++; 747 rsum[gcol + col] += vabs; 748 /* non-diagonal block */ 749 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 750 } 751 } 752 } 753 PetscCall(PetscLogFlops(nz * bs * bs)); 754 } 755 /* Bmat */ 756 v = bmat->a; 757 jj = bmat->j; 758 for (brow = 0; brow < mbs; brow++) { 759 grow = bs * (rstart + brow); 760 nz = bmat->i[brow + 1] - bmat->i[brow]; 761 for (bcol = 0; bcol < nz; bcol++) { 762 gcol = bs * garray[*jj]; 763 jj++; 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 vabs = PetscAbsScalar(*v); 767 v++; 768 rsum[gcol + col] += vabs; 769 rsum[grow + row] += vabs; 770 } 771 } 772 } 773 PetscCall(PetscLogFlops(nz * bs * bs)); 774 } 775 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 776 *norm = 0.0; 777 for (col = 0; col < mat->cmap->N; col++) { 778 if (rsum2[col] > *norm) *norm = rsum2[col]; 779 } 780 PetscCall(PetscFree2(rsum, rsum2)); 781 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 782 } 783 PetscFunctionReturn(PETSC_SUCCESS); 784 } 785 786 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 787 { 788 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 789 PetscInt nstash, reallocs; 790 791 PetscFunctionBegin; 792 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 793 794 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 795 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 796 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 797 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 798 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 799 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 800 PetscFunctionReturn(PETSC_SUCCESS); 801 } 802 803 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 804 { 805 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 806 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 807 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 808 PetscInt *row, *col; 809 PetscBool other_disassembled; 810 PetscMPIInt n; 811 PetscBool r1, r2, r3; 812 MatScalar *val; 813 814 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 815 PetscFunctionBegin; 816 if (!baij->donotstash && !mat->nooffprocentries) { 817 while (1) { 818 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 819 if (!flg) break; 820 821 for (i = 0; i < n;) { 822 /* Now identify the consecutive vals belonging to the same row */ 823 for (j = i, rstart = row[j]; j < n; j++) { 824 if (row[j] != rstart) break; 825 } 826 if (j < n) ncols = j - i; 827 else ncols = n - i; 828 /* Now assemble all these values with a single function call */ 829 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 830 i = j; 831 } 832 } 833 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 834 /* Now process the block-stash. Since the values are stashed column-oriented, 835 set the roworiented flag to column oriented, and after MatSetValues() 836 restore the original flags */ 837 r1 = baij->roworiented; 838 r2 = a->roworiented; 839 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 840 841 baij->roworiented = PETSC_FALSE; 842 a->roworiented = PETSC_FALSE; 843 844 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 845 while (1) { 846 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 847 if (!flg) break; 848 849 for (i = 0; i < n;) { 850 /* Now identify the consecutive vals belonging to the same row */ 851 for (j = i, rstart = row[j]; j < n; j++) { 852 if (row[j] != rstart) break; 853 } 854 if (j < n) ncols = j - i; 855 else ncols = n - i; 856 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 857 i = j; 858 } 859 } 860 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 861 862 baij->roworiented = r1; 863 a->roworiented = r2; 864 865 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 866 } 867 868 PetscCall(MatAssemblyBegin(baij->A, mode)); 869 PetscCall(MatAssemblyEnd(baij->A, mode)); 870 871 /* determine if any processor has disassembled, if so we must 872 also disassemble ourselves, in order that we may reassemble. */ 873 /* 874 if nonzero structure of submatrix B cannot change then we know that 875 no processor disassembled thus we can skip this stuff 876 */ 877 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 878 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 879 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 880 } 881 882 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 883 PetscCall(MatAssemblyBegin(baij->B, mode)); 884 PetscCall(MatAssemblyEnd(baij->B, mode)); 885 886 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 887 888 baij->rowvalues = NULL; 889 890 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 891 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 892 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 893 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 894 } 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 899 #include <petscdraw.h> 900 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 901 { 902 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 903 PetscInt bs = mat->rmap->bs; 904 PetscMPIInt rank = baij->rank; 905 PetscBool iascii, isdraw; 906 PetscViewer sviewer; 907 PetscViewerFormat format; 908 909 PetscFunctionBegin; 910 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 911 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 912 if (iascii) { 913 PetscCall(PetscViewerGetFormat(viewer, &format)); 914 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 915 MatInfo info; 916 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 917 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 918 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 919 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 920 mat->rmap->bs, (double)info.memory)); 921 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 922 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 923 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 924 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 925 PetscCall(PetscViewerFlush(viewer)); 926 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 927 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 928 PetscCall(VecScatterView(baij->Mvctx, viewer)); 929 PetscFunctionReturn(PETSC_SUCCESS); 930 } else if (format == PETSC_VIEWER_ASCII_INFO) { 931 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 932 PetscFunctionReturn(PETSC_SUCCESS); 933 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 934 PetscFunctionReturn(PETSC_SUCCESS); 935 } 936 } 937 938 if (isdraw) { 939 PetscDraw draw; 940 PetscBool isnull; 941 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 942 PetscCall(PetscDrawIsNull(draw, &isnull)); 943 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 944 } 945 946 { 947 /* assemble the entire matrix onto first processor. */ 948 Mat A; 949 Mat_SeqSBAIJ *Aloc; 950 Mat_SeqBAIJ *Bloc; 951 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 952 MatScalar *a; 953 const char *matname; 954 955 /* Should this be the same type as mat? */ 956 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 957 if (rank == 0) { 958 PetscCall(MatSetSizes(A, M, N, M, N)); 959 } else { 960 PetscCall(MatSetSizes(A, 0, 0, M, N)); 961 } 962 PetscCall(MatSetType(A, MATMPISBAIJ)); 963 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 964 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 965 966 /* copy over the A part */ 967 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 968 ai = Aloc->i; 969 aj = Aloc->j; 970 a = Aloc->a; 971 PetscCall(PetscMalloc1(bs, &rvals)); 972 973 for (i = 0; i < mbs; i++) { 974 rvals[0] = bs * (baij->rstartbs + i); 975 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 976 for (j = ai[i]; j < ai[i + 1]; j++) { 977 col = (baij->cstartbs + aj[j]) * bs; 978 for (k = 0; k < bs; k++) { 979 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 980 col++; 981 a += bs; 982 } 983 } 984 } 985 /* copy over the B part */ 986 Bloc = (Mat_SeqBAIJ *)baij->B->data; 987 ai = Bloc->i; 988 aj = Bloc->j; 989 a = Bloc->a; 990 for (i = 0; i < mbs; i++) { 991 rvals[0] = bs * (baij->rstartbs + i); 992 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 993 for (j = ai[i]; j < ai[i + 1]; j++) { 994 col = baij->garray[aj[j]] * bs; 995 for (k = 0; k < bs; k++) { 996 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 997 col++; 998 a += bs; 999 } 1000 } 1001 } 1002 PetscCall(PetscFree(rvals)); 1003 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1004 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1005 /* 1006 Everyone has to call to draw the matrix since the graphics waits are 1007 synchronized across all processors that share the PetscDraw object 1008 */ 1009 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1010 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1011 if (rank == 0) { 1012 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 1013 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 1014 } 1015 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1016 PetscCall(PetscViewerFlush(viewer)); 1017 PetscCall(MatDestroy(&A)); 1018 } 1019 PetscFunctionReturn(PETSC_SUCCESS); 1020 } 1021 1022 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1023 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1024 1025 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1026 { 1027 PetscBool iascii, isdraw, issocket, isbinary; 1028 1029 PetscFunctionBegin; 1030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1031 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1032 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1033 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1034 if (iascii || isdraw || issocket) { 1035 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1036 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1037 PetscFunctionReturn(PETSC_SUCCESS); 1038 } 1039 1040 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1041 { 1042 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1043 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1044 PetscScalar *from; 1045 const PetscScalar *x; 1046 1047 PetscFunctionBegin; 1048 /* diagonal part */ 1049 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1050 PetscCall(VecSet(a->slvec1b, 0.0)); 1051 1052 /* subdiagonal part */ 1053 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1054 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1055 1056 /* copy x into the vec slvec0 */ 1057 PetscCall(VecGetArray(a->slvec0, &from)); 1058 PetscCall(VecGetArrayRead(xx, &x)); 1059 1060 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1061 PetscCall(VecRestoreArray(a->slvec0, &from)); 1062 PetscCall(VecRestoreArrayRead(xx, &x)); 1063 1064 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1065 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1066 /* supperdiagonal part */ 1067 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1068 PetscFunctionReturn(PETSC_SUCCESS); 1069 } 1070 1071 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1072 { 1073 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1074 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1075 PetscScalar *from; 1076 const PetscScalar *x; 1077 1078 PetscFunctionBegin; 1079 /* diagonal part */ 1080 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1081 PetscCall(VecSet(a->slvec1b, 0.0)); 1082 1083 /* subdiagonal part */ 1084 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1085 1086 /* copy x into the vec slvec0 */ 1087 PetscCall(VecGetArray(a->slvec0, &from)); 1088 PetscCall(VecGetArrayRead(xx, &x)); 1089 1090 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1091 PetscCall(VecRestoreArray(a->slvec0, &from)); 1092 PetscCall(VecRestoreArrayRead(xx, &x)); 1093 1094 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1095 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1096 /* supperdiagonal part */ 1097 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1098 PetscFunctionReturn(PETSC_SUCCESS); 1099 } 1100 1101 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1102 { 1103 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1104 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1105 PetscScalar *from, zero = 0.0; 1106 const PetscScalar *x; 1107 1108 PetscFunctionBegin; 1109 /* diagonal part */ 1110 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1111 PetscCall(VecSet(a->slvec1b, zero)); 1112 1113 /* subdiagonal part */ 1114 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1115 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1116 1117 /* copy x into the vec slvec0 */ 1118 PetscCall(VecGetArray(a->slvec0, &from)); 1119 PetscCall(VecGetArrayRead(xx, &x)); 1120 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1121 PetscCall(VecRestoreArray(a->slvec0, &from)); 1122 1123 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1124 PetscCall(VecRestoreArrayRead(xx, &x)); 1125 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1126 1127 /* supperdiagonal part */ 1128 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1133 { 1134 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1135 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1136 PetscScalar *from, zero = 0.0; 1137 const PetscScalar *x; 1138 1139 PetscFunctionBegin; 1140 /* diagonal part */ 1141 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1142 PetscCall(VecSet(a->slvec1b, zero)); 1143 1144 /* subdiagonal part */ 1145 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1146 1147 /* copy x into the vec slvec0 */ 1148 PetscCall(VecGetArray(a->slvec0, &from)); 1149 PetscCall(VecGetArrayRead(xx, &x)); 1150 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1151 PetscCall(VecRestoreArray(a->slvec0, &from)); 1152 1153 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1154 PetscCall(VecRestoreArrayRead(xx, &x)); 1155 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1156 1157 /* supperdiagonal part */ 1158 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1159 PetscFunctionReturn(PETSC_SUCCESS); 1160 } 1161 1162 /* 1163 This only works correctly for square matrices where the subblock A->A is the 1164 diagonal block 1165 */ 1166 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1167 { 1168 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1172 PetscCall(MatGetDiagonal(a->A, v)); 1173 PetscFunctionReturn(PETSC_SUCCESS); 1174 } 1175 1176 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1177 { 1178 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1179 1180 PetscFunctionBegin; 1181 PetscCall(MatScale(a->A, aa)); 1182 PetscCall(MatScale(a->B, aa)); 1183 PetscFunctionReturn(PETSC_SUCCESS); 1184 } 1185 1186 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1187 { 1188 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1189 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1190 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1191 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1192 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1193 1194 PetscFunctionBegin; 1195 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1196 mat->getrowactive = PETSC_TRUE; 1197 1198 if (!mat->rowvalues && (idx || v)) { 1199 /* 1200 allocate enough space to hold information from the longest row. 1201 */ 1202 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1203 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1204 PetscInt max = 1, mbs = mat->mbs, tmp; 1205 for (i = 0; i < mbs; i++) { 1206 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1207 if (max < tmp) max = tmp; 1208 } 1209 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1210 } 1211 1212 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1213 lrow = row - brstart; /* local row index */ 1214 1215 pvA = &vworkA; 1216 pcA = &cworkA; 1217 pvB = &vworkB; 1218 pcB = &cworkB; 1219 if (!v) { 1220 pvA = NULL; 1221 pvB = NULL; 1222 } 1223 if (!idx) { 1224 pcA = NULL; 1225 if (!v) pcB = NULL; 1226 } 1227 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1228 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1229 nztot = nzA + nzB; 1230 1231 cmap = mat->garray; 1232 if (v || idx) { 1233 if (nztot) { 1234 /* Sort by increasing column numbers, assuming A and B already sorted */ 1235 PetscInt imark = -1; 1236 if (v) { 1237 *v = v_p = mat->rowvalues; 1238 for (i = 0; i < nzB; i++) { 1239 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1240 else break; 1241 } 1242 imark = i; 1243 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1244 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1245 } 1246 if (idx) { 1247 *idx = idx_p = mat->rowindices; 1248 if (imark > -1) { 1249 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1250 } else { 1251 for (i = 0; i < nzB; i++) { 1252 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1253 else break; 1254 } 1255 imark = i; 1256 } 1257 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1258 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1259 } 1260 } else { 1261 if (idx) *idx = NULL; 1262 if (v) *v = NULL; 1263 } 1264 } 1265 *nz = nztot; 1266 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1267 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1268 PetscFunctionReturn(PETSC_SUCCESS); 1269 } 1270 1271 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1272 { 1273 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1274 1275 PetscFunctionBegin; 1276 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1277 baij->getrowactive = PETSC_FALSE; 1278 PetscFunctionReturn(PETSC_SUCCESS); 1279 } 1280 1281 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1282 { 1283 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1284 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1285 1286 PetscFunctionBegin; 1287 aA->getrow_utriangular = PETSC_TRUE; 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1291 { 1292 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1293 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1294 1295 PetscFunctionBegin; 1296 aA->getrow_utriangular = PETSC_FALSE; 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } 1299 1300 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1301 { 1302 PetscFunctionBegin; 1303 if (PetscDefined(USE_COMPLEX)) { 1304 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1305 1306 PetscCall(MatConjugate(a->A)); 1307 PetscCall(MatConjugate(a->B)); 1308 } 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } 1311 1312 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1313 { 1314 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1315 1316 PetscFunctionBegin; 1317 PetscCall(MatRealPart(a->A)); 1318 PetscCall(MatRealPart(a->B)); 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 1322 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1323 { 1324 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1325 1326 PetscFunctionBegin; 1327 PetscCall(MatImaginaryPart(a->A)); 1328 PetscCall(MatImaginaryPart(a->B)); 1329 PetscFunctionReturn(PETSC_SUCCESS); 1330 } 1331 1332 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1333 Input: isrow - distributed(parallel), 1334 iscol_local - locally owned (seq) 1335 */ 1336 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1337 { 1338 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1339 const PetscInt *ptr1, *ptr2; 1340 1341 PetscFunctionBegin; 1342 PetscCall(ISGetLocalSize(isrow, &sz1)); 1343 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1344 if (sz1 > sz2) { 1345 *flg = PETSC_FALSE; 1346 PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 1349 PetscCall(ISGetIndices(isrow, &ptr1)); 1350 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1351 1352 PetscCall(PetscMalloc1(sz1, &a1)); 1353 PetscCall(PetscMalloc1(sz2, &a2)); 1354 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1355 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1356 PetscCall(PetscSortInt(sz1, a1)); 1357 PetscCall(PetscSortInt(sz2, a2)); 1358 1359 nmatch = 0; 1360 k = 0; 1361 for (i = 0; i < sz1; i++) { 1362 for (j = k; j < sz2; j++) { 1363 if (a1[i] == a2[j]) { 1364 k = j; 1365 nmatch++; 1366 break; 1367 } 1368 } 1369 } 1370 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1371 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1372 PetscCall(PetscFree(a1)); 1373 PetscCall(PetscFree(a2)); 1374 if (nmatch < sz1) { 1375 *flg = PETSC_FALSE; 1376 } else { 1377 *flg = PETSC_TRUE; 1378 } 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1383 { 1384 IS iscol_local; 1385 PetscInt csize; 1386 PetscBool isequal; 1387 1388 PetscFunctionBegin; 1389 PetscCall(ISGetLocalSize(iscol, &csize)); 1390 if (call == MAT_REUSE_MATRIX) { 1391 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1392 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1393 } else { 1394 PetscBool issorted; 1395 1396 PetscCall(ISAllGather(iscol, &iscol_local)); 1397 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1398 PetscCall(ISSorted(iscol_local, &issorted)); 1399 PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 1400 } 1401 1402 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1403 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1404 if (call == MAT_INITIAL_MATRIX) { 1405 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1406 PetscCall(ISDestroy(&iscol_local)); 1407 } 1408 PetscFunctionReturn(PETSC_SUCCESS); 1409 } 1410 1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1412 { 1413 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1414 1415 PetscFunctionBegin; 1416 PetscCall(MatZeroEntries(l->A)); 1417 PetscCall(MatZeroEntries(l->B)); 1418 PetscFunctionReturn(PETSC_SUCCESS); 1419 } 1420 1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1422 { 1423 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1424 Mat A = a->A, B = a->B; 1425 PetscLogDouble isend[5], irecv[5]; 1426 1427 PetscFunctionBegin; 1428 info->block_size = (PetscReal)matin->rmap->bs; 1429 1430 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1431 1432 isend[0] = info->nz_used; 1433 isend[1] = info->nz_allocated; 1434 isend[2] = info->nz_unneeded; 1435 isend[3] = info->memory; 1436 isend[4] = info->mallocs; 1437 1438 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1439 1440 isend[0] += info->nz_used; 1441 isend[1] += info->nz_allocated; 1442 isend[2] += info->nz_unneeded; 1443 isend[3] += info->memory; 1444 isend[4] += info->mallocs; 1445 if (flag == MAT_LOCAL) { 1446 info->nz_used = isend[0]; 1447 info->nz_allocated = isend[1]; 1448 info->nz_unneeded = isend[2]; 1449 info->memory = isend[3]; 1450 info->mallocs = isend[4]; 1451 } else if (flag == MAT_GLOBAL_MAX) { 1452 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1453 1454 info->nz_used = irecv[0]; 1455 info->nz_allocated = irecv[1]; 1456 info->nz_unneeded = irecv[2]; 1457 info->memory = irecv[3]; 1458 info->mallocs = irecv[4]; 1459 } else if (flag == MAT_GLOBAL_SUM) { 1460 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1461 1462 info->nz_used = irecv[0]; 1463 info->nz_allocated = irecv[1]; 1464 info->nz_unneeded = irecv[2]; 1465 info->memory = irecv[3]; 1466 info->mallocs = irecv[4]; 1467 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1468 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1469 info->fill_ratio_needed = 0; 1470 info->factor_mallocs = 0; 1471 PetscFunctionReturn(PETSC_SUCCESS); 1472 } 1473 1474 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1475 { 1476 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1477 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1478 1479 PetscFunctionBegin; 1480 switch (op) { 1481 case MAT_NEW_NONZERO_LOCATIONS: 1482 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1483 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1484 case MAT_KEEP_NONZERO_PATTERN: 1485 case MAT_SUBMAT_SINGLEIS: 1486 case MAT_NEW_NONZERO_LOCATION_ERR: 1487 MatCheckPreallocated(A, 1); 1488 PetscCall(MatSetOption(a->A, op, flg)); 1489 PetscCall(MatSetOption(a->B, op, flg)); 1490 break; 1491 case MAT_ROW_ORIENTED: 1492 MatCheckPreallocated(A, 1); 1493 a->roworiented = flg; 1494 1495 PetscCall(MatSetOption(a->A, op, flg)); 1496 PetscCall(MatSetOption(a->B, op, flg)); 1497 break; 1498 case MAT_FORCE_DIAGONAL_ENTRIES: 1499 case MAT_SORTED_FULL: 1500 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1501 break; 1502 case MAT_IGNORE_OFF_PROC_ENTRIES: 1503 a->donotstash = flg; 1504 break; 1505 case MAT_USE_HASH_TABLE: 1506 a->ht_flag = flg; 1507 break; 1508 case MAT_HERMITIAN: 1509 MatCheckPreallocated(A, 1); 1510 PetscCall(MatSetOption(a->A, op, flg)); 1511 #if defined(PETSC_USE_COMPLEX) 1512 if (flg) { /* need different mat-vec ops */ 1513 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1514 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1515 A->ops->multtranspose = NULL; 1516 A->ops->multtransposeadd = NULL; 1517 A->symmetric = PETSC_BOOL3_FALSE; 1518 } 1519 #endif 1520 break; 1521 case MAT_SPD: 1522 case MAT_SYMMETRIC: 1523 MatCheckPreallocated(A, 1); 1524 PetscCall(MatSetOption(a->A, op, flg)); 1525 #if defined(PETSC_USE_COMPLEX) 1526 if (flg) { /* restore to use default mat-vec ops */ 1527 A->ops->mult = MatMult_MPISBAIJ; 1528 A->ops->multadd = MatMultAdd_MPISBAIJ; 1529 A->ops->multtranspose = MatMult_MPISBAIJ; 1530 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1531 } 1532 #endif 1533 break; 1534 case MAT_STRUCTURALLY_SYMMETRIC: 1535 MatCheckPreallocated(A, 1); 1536 PetscCall(MatSetOption(a->A, op, flg)); 1537 break; 1538 case MAT_SYMMETRY_ETERNAL: 1539 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1540 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1541 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1542 break; 1543 case MAT_SPD_ETERNAL: 1544 break; 1545 case MAT_IGNORE_LOWER_TRIANGULAR: 1546 aA->ignore_ltriangular = flg; 1547 break; 1548 case MAT_ERROR_LOWER_TRIANGULAR: 1549 aA->ignore_ltriangular = flg; 1550 break; 1551 case MAT_GETROW_UPPERTRIANGULAR: 1552 aA->getrow_utriangular = flg; 1553 break; 1554 default: 1555 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1556 } 1557 PetscFunctionReturn(PETSC_SUCCESS); 1558 } 1559 1560 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1561 { 1562 PetscFunctionBegin; 1563 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1564 if (reuse == MAT_INITIAL_MATRIX) { 1565 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1566 } else if (reuse == MAT_REUSE_MATRIX) { 1567 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1568 } 1569 PetscFunctionReturn(PETSC_SUCCESS); 1570 } 1571 1572 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1573 { 1574 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1575 Mat a = baij->A, b = baij->B; 1576 PetscInt nv, m, n; 1577 PetscBool flg; 1578 1579 PetscFunctionBegin; 1580 if (ll != rr) { 1581 PetscCall(VecEqual(ll, rr, &flg)); 1582 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1583 } 1584 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1585 1586 PetscCall(MatGetLocalSize(mat, &m, &n)); 1587 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1588 1589 PetscCall(VecGetLocalSize(rr, &nv)); 1590 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1591 1592 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1593 1594 /* left diagonalscale the off-diagonal part */ 1595 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1596 1597 /* scale the diagonal part */ 1598 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1599 1600 /* right diagonalscale the off-diagonal part */ 1601 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1602 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 } 1605 1606 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1607 { 1608 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1609 1610 PetscFunctionBegin; 1611 PetscCall(MatSetUnfactored(a->A)); 1612 PetscFunctionReturn(PETSC_SUCCESS); 1613 } 1614 1615 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1616 1617 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1618 { 1619 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1620 Mat a, b, c, d; 1621 PetscBool flg; 1622 1623 PetscFunctionBegin; 1624 a = matA->A; 1625 b = matA->B; 1626 c = matB->A; 1627 d = matB->B; 1628 1629 PetscCall(MatEqual(a, c, &flg)); 1630 if (flg) PetscCall(MatEqual(b, d, &flg)); 1631 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1632 PetscFunctionReturn(PETSC_SUCCESS); 1633 } 1634 1635 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1636 { 1637 PetscBool isbaij; 1638 1639 PetscFunctionBegin; 1640 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1641 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1642 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1643 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1644 PetscCall(MatGetRowUpperTriangular(A)); 1645 PetscCall(MatCopy_Basic(A, B, str)); 1646 PetscCall(MatRestoreRowUpperTriangular(A)); 1647 } else { 1648 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1649 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1650 1651 PetscCall(MatCopy(a->A, b->A, str)); 1652 PetscCall(MatCopy(a->B, b->B, str)); 1653 } 1654 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1655 PetscFunctionReturn(PETSC_SUCCESS); 1656 } 1657 1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1659 { 1660 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1661 PetscBLASInt bnz, one = 1; 1662 Mat_SeqSBAIJ *xa, *ya; 1663 Mat_SeqBAIJ *xb, *yb; 1664 1665 PetscFunctionBegin; 1666 if (str == SAME_NONZERO_PATTERN) { 1667 PetscScalar alpha = a; 1668 xa = (Mat_SeqSBAIJ *)xx->A->data; 1669 ya = (Mat_SeqSBAIJ *)yy->A->data; 1670 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1671 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1672 xb = (Mat_SeqBAIJ *)xx->B->data; 1673 yb = (Mat_SeqBAIJ *)yy->B->data; 1674 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1675 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1676 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1677 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1678 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1679 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1680 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1681 } else { 1682 Mat B; 1683 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1684 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1685 PetscCall(MatGetRowUpperTriangular(X)); 1686 PetscCall(MatGetRowUpperTriangular(Y)); 1687 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1688 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1689 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1690 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1691 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1692 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1693 PetscCall(MatSetType(B, MATMPISBAIJ)); 1694 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1695 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1696 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1697 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1698 PetscCall(MatHeaderMerge(Y, &B)); 1699 PetscCall(PetscFree(nnz_d)); 1700 PetscCall(PetscFree(nnz_o)); 1701 PetscCall(MatRestoreRowUpperTriangular(X)); 1702 PetscCall(MatRestoreRowUpperTriangular(Y)); 1703 } 1704 PetscFunctionReturn(PETSC_SUCCESS); 1705 } 1706 1707 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1708 { 1709 PetscInt i; 1710 PetscBool flg; 1711 1712 PetscFunctionBegin; 1713 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1714 for (i = 0; i < n; i++) { 1715 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1716 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1717 } 1718 PetscFunctionReturn(PETSC_SUCCESS); 1719 } 1720 1721 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1722 { 1723 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1724 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1725 1726 PetscFunctionBegin; 1727 if (!Y->preallocated) { 1728 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1729 } else if (!aij->nz) { 1730 PetscInt nonew = aij->nonew; 1731 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1732 aij->nonew = nonew; 1733 } 1734 PetscCall(MatShift_Basic(Y, a)); 1735 PetscFunctionReturn(PETSC_SUCCESS); 1736 } 1737 1738 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1739 { 1740 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1741 1742 PetscFunctionBegin; 1743 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1744 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1745 if (d) { 1746 PetscInt rstart; 1747 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1748 *d += rstart / A->rmap->bs; 1749 } 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1754 { 1755 PetscFunctionBegin; 1756 *a = ((Mat_MPISBAIJ *)A->data)->A; 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1761 MatGetRow_MPISBAIJ, 1762 MatRestoreRow_MPISBAIJ, 1763 MatMult_MPISBAIJ, 1764 /* 4*/ MatMultAdd_MPISBAIJ, 1765 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1766 MatMultAdd_MPISBAIJ, 1767 NULL, 1768 NULL, 1769 NULL, 1770 /* 10*/ NULL, 1771 NULL, 1772 NULL, 1773 MatSOR_MPISBAIJ, 1774 MatTranspose_MPISBAIJ, 1775 /* 15*/ MatGetInfo_MPISBAIJ, 1776 MatEqual_MPISBAIJ, 1777 MatGetDiagonal_MPISBAIJ, 1778 MatDiagonalScale_MPISBAIJ, 1779 MatNorm_MPISBAIJ, 1780 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1781 MatAssemblyEnd_MPISBAIJ, 1782 MatSetOption_MPISBAIJ, 1783 MatZeroEntries_MPISBAIJ, 1784 /* 24*/ NULL, 1785 NULL, 1786 NULL, 1787 NULL, 1788 NULL, 1789 /* 29*/ MatSetUp_MPI_Hash, 1790 NULL, 1791 NULL, 1792 MatGetDiagonalBlock_MPISBAIJ, 1793 NULL, 1794 /* 34*/ MatDuplicate_MPISBAIJ, 1795 NULL, 1796 NULL, 1797 NULL, 1798 NULL, 1799 /* 39*/ MatAXPY_MPISBAIJ, 1800 MatCreateSubMatrices_MPISBAIJ, 1801 MatIncreaseOverlap_MPISBAIJ, 1802 MatGetValues_MPISBAIJ, 1803 MatCopy_MPISBAIJ, 1804 /* 44*/ NULL, 1805 MatScale_MPISBAIJ, 1806 MatShift_MPISBAIJ, 1807 NULL, 1808 NULL, 1809 /* 49*/ NULL, 1810 NULL, 1811 NULL, 1812 NULL, 1813 NULL, 1814 /* 54*/ NULL, 1815 NULL, 1816 MatSetUnfactored_MPISBAIJ, 1817 NULL, 1818 MatSetValuesBlocked_MPISBAIJ, 1819 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1820 NULL, 1821 NULL, 1822 NULL, 1823 NULL, 1824 /* 64*/ NULL, 1825 NULL, 1826 NULL, 1827 NULL, 1828 NULL, 1829 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1830 NULL, 1831 MatConvert_MPISBAIJ_Basic, 1832 NULL, 1833 NULL, 1834 /* 74*/ NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 NULL, 1839 /* 79*/ NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 MatLoad_MPISBAIJ, 1844 /* 84*/ NULL, 1845 NULL, 1846 NULL, 1847 NULL, 1848 NULL, 1849 /* 89*/ NULL, 1850 NULL, 1851 NULL, 1852 NULL, 1853 NULL, 1854 /* 94*/ NULL, 1855 NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 /* 99*/ NULL, 1860 NULL, 1861 NULL, 1862 MatConjugate_MPISBAIJ, 1863 NULL, 1864 /*104*/ NULL, 1865 MatRealPart_MPISBAIJ, 1866 MatImaginaryPart_MPISBAIJ, 1867 MatGetRowUpperTriangular_MPISBAIJ, 1868 MatRestoreRowUpperTriangular_MPISBAIJ, 1869 /*109*/ NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 MatMissingDiagonal_MPISBAIJ, 1874 /*114*/ NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 /*119*/ NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 NULL, 1884 /*124*/ NULL, 1885 NULL, 1886 NULL, 1887 NULL, 1888 NULL, 1889 /*129*/ NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 NULL, 1894 /*134*/ NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 NULL, 1899 /*139*/ MatSetBlockSizes_Default, 1900 NULL, 1901 NULL, 1902 NULL, 1903 NULL, 1904 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1905 NULL, 1906 NULL, 1907 NULL, 1908 NULL, 1909 NULL, 1910 /*150*/ NULL, 1911 NULL}; 1912 1913 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1914 { 1915 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1916 PetscInt i, mbs, Mbs; 1917 PetscMPIInt size; 1918 1919 PetscFunctionBegin; 1920 if (B->hash_active) { 1921 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 1922 B->hash_active = PETSC_FALSE; 1923 } 1924 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1925 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1926 PetscCall(PetscLayoutSetUp(B->rmap)); 1927 PetscCall(PetscLayoutSetUp(B->cmap)); 1928 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1929 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1930 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1931 1932 mbs = B->rmap->n / bs; 1933 Mbs = B->rmap->N / bs; 1934 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1935 1936 B->rmap->bs = bs; 1937 b->bs2 = bs * bs; 1938 b->mbs = mbs; 1939 b->Mbs = Mbs; 1940 b->nbs = B->cmap->n / bs; 1941 b->Nbs = B->cmap->N / bs; 1942 1943 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1944 b->rstartbs = B->rmap->rstart / bs; 1945 b->rendbs = B->rmap->rend / bs; 1946 1947 b->cstartbs = B->cmap->rstart / bs; 1948 b->cendbs = B->cmap->rend / bs; 1949 1950 #if defined(PETSC_USE_CTABLE) 1951 PetscCall(PetscHMapIDestroy(&b->colmap)); 1952 #else 1953 PetscCall(PetscFree(b->colmap)); 1954 #endif 1955 PetscCall(PetscFree(b->garray)); 1956 PetscCall(VecDestroy(&b->lvec)); 1957 PetscCall(VecScatterDestroy(&b->Mvctx)); 1958 PetscCall(VecDestroy(&b->slvec0)); 1959 PetscCall(VecDestroy(&b->slvec0b)); 1960 PetscCall(VecDestroy(&b->slvec1)); 1961 PetscCall(VecDestroy(&b->slvec1a)); 1962 PetscCall(VecDestroy(&b->slvec1b)); 1963 PetscCall(VecScatterDestroy(&b->sMvctx)); 1964 1965 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1966 PetscCall(MatDestroy(&b->B)); 1967 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1968 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1969 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1970 1971 PetscCall(MatDestroy(&b->A)); 1972 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1973 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1974 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1975 1976 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1977 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1978 1979 B->preallocated = PETSC_TRUE; 1980 B->was_assembled = PETSC_FALSE; 1981 B->assembled = PETSC_FALSE; 1982 PetscFunctionReturn(PETSC_SUCCESS); 1983 } 1984 1985 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1986 { 1987 PetscInt m, rstart, cend; 1988 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1989 const PetscInt *JJ = NULL; 1990 PetscScalar *values = NULL; 1991 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 1992 PetscBool nooffprocentries; 1993 1994 PetscFunctionBegin; 1995 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 1996 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 1997 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 1998 PetscCall(PetscLayoutSetUp(B->rmap)); 1999 PetscCall(PetscLayoutSetUp(B->cmap)); 2000 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2001 m = B->rmap->n / bs; 2002 rstart = B->rmap->rstart / bs; 2003 cend = B->cmap->rend / bs; 2004 2005 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2006 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2007 for (i = 0; i < m; i++) { 2008 nz = ii[i + 1] - ii[i]; 2009 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2010 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2011 JJ = jj + ii[i]; 2012 bd = 0; 2013 for (j = 0; j < nz; j++) { 2014 if (*JJ >= i + rstart) break; 2015 JJ++; 2016 bd++; 2017 } 2018 d = 0; 2019 for (; j < nz; j++) { 2020 if (*JJ++ >= cend) break; 2021 d++; 2022 } 2023 d_nnz[i] = d; 2024 o_nnz[i] = nz - d - bd; 2025 nz = nz - bd; 2026 nz_max = PetscMax(nz_max, nz); 2027 } 2028 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2029 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2030 PetscCall(PetscFree2(d_nnz, o_nnz)); 2031 2032 values = (PetscScalar *)V; 2033 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2034 for (i = 0; i < m; i++) { 2035 PetscInt row = i + rstart; 2036 PetscInt ncols = ii[i + 1] - ii[i]; 2037 const PetscInt *icols = jj + ii[i]; 2038 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2039 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2040 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2041 } else { /* block ordering does not match so we can only insert one block at a time. */ 2042 PetscInt j; 2043 for (j = 0; j < ncols; j++) { 2044 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2045 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2046 } 2047 } 2048 } 2049 2050 if (!V) PetscCall(PetscFree(values)); 2051 nooffprocentries = B->nooffprocentries; 2052 B->nooffprocentries = PETSC_TRUE; 2053 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2054 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2055 B->nooffprocentries = nooffprocentries; 2056 2057 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2058 PetscFunctionReturn(PETSC_SUCCESS); 2059 } 2060 2061 /*MC 2062 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2063 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2064 the matrix is stored. 2065 2066 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2067 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2068 2069 Options Database Key: 2070 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2071 2072 Level: beginner 2073 2074 Note: 2075 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2076 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2077 2078 .seealso: [](chapter_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2079 M*/ 2080 2081 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2082 { 2083 Mat_MPISBAIJ *b; 2084 PetscBool flg = PETSC_FALSE; 2085 2086 PetscFunctionBegin; 2087 PetscCall(PetscNew(&b)); 2088 B->data = (void *)b; 2089 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2090 2091 B->ops->destroy = MatDestroy_MPISBAIJ; 2092 B->ops->view = MatView_MPISBAIJ; 2093 B->assembled = PETSC_FALSE; 2094 B->insertmode = NOT_SET_VALUES; 2095 2096 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2097 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2098 2099 /* build local table of row and column ownerships */ 2100 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2101 2102 /* build cache for off array entries formed */ 2103 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2104 2105 b->donotstash = PETSC_FALSE; 2106 b->colmap = NULL; 2107 b->garray = NULL; 2108 b->roworiented = PETSC_TRUE; 2109 2110 /* stuff used in block assembly */ 2111 b->barray = NULL; 2112 2113 /* stuff used for matrix vector multiply */ 2114 b->lvec = NULL; 2115 b->Mvctx = NULL; 2116 b->slvec0 = NULL; 2117 b->slvec0b = NULL; 2118 b->slvec1 = NULL; 2119 b->slvec1a = NULL; 2120 b->slvec1b = NULL; 2121 b->sMvctx = NULL; 2122 2123 /* stuff for MatGetRow() */ 2124 b->rowindices = NULL; 2125 b->rowvalues = NULL; 2126 b->getrowactive = PETSC_FALSE; 2127 2128 /* hash table stuff */ 2129 b->ht = NULL; 2130 b->hd = NULL; 2131 b->ht_size = 0; 2132 b->ht_flag = PETSC_FALSE; 2133 b->ht_fact = 0; 2134 b->ht_total_ct = 0; 2135 b->ht_insert_ct = 0; 2136 2137 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2138 b->ijonly = PETSC_FALSE; 2139 2140 b->in_loc = NULL; 2141 b->v_loc = NULL; 2142 b->n_loc = 0; 2143 2144 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2145 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2146 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2147 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2148 #if defined(PETSC_HAVE_ELEMENTAL) 2149 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2150 #endif 2151 #if defined(PETSC_HAVE_SCALAPACK) 2152 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2153 #endif 2154 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2155 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2156 2157 B->symmetric = PETSC_BOOL3_TRUE; 2158 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2159 B->symmetry_eternal = PETSC_TRUE; 2160 B->structural_symmetry_eternal = PETSC_TRUE; 2161 #if defined(PETSC_USE_COMPLEX) 2162 B->hermitian = PETSC_BOOL3_FALSE; 2163 #else 2164 B->hermitian = PETSC_BOOL3_TRUE; 2165 #endif 2166 2167 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2168 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2169 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2170 if (flg) { 2171 PetscReal fact = 1.39; 2172 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2173 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2174 if (fact <= 1.0) fact = 1.39; 2175 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2176 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2177 } 2178 PetscOptionsEnd(); 2179 PetscFunctionReturn(PETSC_SUCCESS); 2180 } 2181 2182 /*MC 2183 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2184 2185 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2186 and `MATMPISBAIJ` otherwise. 2187 2188 Options Database Key: 2189 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2190 2191 Level: beginner 2192 2193 .seealso: [](chapter_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2194 M*/ 2195 2196 /*@C 2197 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2198 the user should preallocate the matrix storage by setting the parameters 2199 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2200 performance can be increased by more than a factor of 50. 2201 2202 Collective 2203 2204 Input Parameters: 2205 + B - the matrix 2206 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2207 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2208 . d_nz - number of block nonzeros per block row in diagonal portion of local 2209 submatrix (same for all local rows) 2210 . d_nnz - array containing the number of block nonzeros in the various block rows 2211 in the upper triangular and diagonal part of the in diagonal portion of the local 2212 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2213 for the diagonal entry and set a value even if it is zero. 2214 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2215 submatrix (same for all local rows). 2216 - o_nnz - array containing the number of nonzeros in the various block rows of the 2217 off-diagonal portion of the local submatrix that is right of the diagonal 2218 (possibly different for each block row) or `NULL`. 2219 2220 Options Database Keys: 2221 + -mat_no_unroll - uses code that does not unroll the loops in the 2222 block calculations (much slower) 2223 - -mat_block_size - size of the blocks to use 2224 2225 Level: intermediate 2226 2227 Notes: 2228 2229 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2230 than it must be used on all processors that share the object for that argument. 2231 2232 If the *_nnz parameter is given then the *_nz parameter is ignored 2233 2234 Storage Information: 2235 For a square global matrix we define each processor's diagonal portion 2236 to be its local rows and the corresponding columns (a square submatrix); 2237 each processor's off-diagonal portion encompasses the remainder of the 2238 local matrix (a rectangular submatrix). 2239 2240 The user can specify preallocated storage for the diagonal part of 2241 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2242 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2243 memory allocation. Likewise, specify preallocated storage for the 2244 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2245 2246 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2247 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2248 You can also run with the option `-info` and look for messages with the string 2249 malloc in them to see if additional memory allocation was needed. 2250 2251 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2252 the figure below we depict these three local rows and all columns (0-11). 2253 2254 .vb 2255 0 1 2 3 4 5 6 7 8 9 10 11 2256 -------------------------- 2257 row 3 |. . . d d d o o o o o o 2258 row 4 |. . . d d d o o o o o o 2259 row 5 |. . . d d d o o o o o o 2260 -------------------------- 2261 .ve 2262 2263 Thus, any entries in the d locations are stored in the d (diagonal) 2264 submatrix, and any entries in the o locations are stored in the 2265 o (off-diagonal) submatrix. Note that the d matrix is stored in 2266 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2267 2268 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2269 plus the diagonal part of the d matrix, 2270 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2271 2272 In general, for PDE problems in which most nonzeros are near the diagonal, 2273 one expects `d_nz` >> `o_nz`. 2274 2275 .seealso: [](chapter_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2276 @*/ 2277 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2278 { 2279 PetscFunctionBegin; 2280 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2281 PetscValidType(B, 1); 2282 PetscValidLogicalCollectiveInt(B, bs, 2); 2283 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2284 PetscFunctionReturn(PETSC_SUCCESS); 2285 } 2286 2287 /*@C 2288 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2289 (block compressed row). For good matrix assembly performance 2290 the user should preallocate the matrix storage by setting the parameters 2291 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2292 performance can be increased by more than a factor of 50. 2293 2294 Collective 2295 2296 Input Parameters: 2297 + comm - MPI communicator 2298 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2299 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2300 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2301 This value should be the same as the local size used in creating the 2302 y vector for the matrix-vector product y = Ax. 2303 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2304 This value should be the same as the local size used in creating the 2305 x vector for the matrix-vector product y = Ax. 2306 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2307 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2308 . d_nz - number of block nonzeros per block row in diagonal portion of local 2309 submatrix (same for all local rows) 2310 . d_nnz - array containing the number of block nonzeros in the various block rows 2311 in the upper triangular portion of the in diagonal portion of the local 2312 (possibly different for each block block row) or `NULL`. 2313 If you plan to factor the matrix you must leave room for the diagonal entry and 2314 set its value even if it is zero. 2315 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2316 submatrix (same for all local rows). 2317 - o_nnz - array containing the number of nonzeros in the various block rows of the 2318 off-diagonal portion of the local submatrix (possibly different for 2319 each block row) or `NULL`. 2320 2321 Output Parameter: 2322 . A - the matrix 2323 2324 Options Database Keys: 2325 + -mat_no_unroll - uses code that does not unroll the loops in the 2326 block calculations (much slower) 2327 . -mat_block_size - size of the blocks to use 2328 - -mat_mpi - use the parallel matrix data structures even on one processor 2329 (defaults to using SeqBAIJ format on one processor) 2330 2331 Level: intermediate 2332 2333 Notes: 2334 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2335 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2336 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2337 2338 The number of rows and columns must be divisible by blocksize. 2339 This matrix type does not support complex Hermitian operation. 2340 2341 The user MUST specify either the local or global matrix dimensions 2342 (possibly both). 2343 2344 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2345 than it must be used on all processors that share the object for that argument. 2346 2347 If the *_nnz parameter is given then the *_nz parameter is ignored 2348 2349 Storage Information: 2350 For a square global matrix we define each processor's diagonal portion 2351 to be its local rows and the corresponding columns (a square submatrix); 2352 each processor's off-diagonal portion encompasses the remainder of the 2353 local matrix (a rectangular submatrix). 2354 2355 The user can specify preallocated storage for the diagonal part of 2356 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2357 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2358 memory allocation. Likewise, specify preallocated storage for the 2359 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2360 2361 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2362 the figure below we depict these three local rows and all columns (0-11). 2363 2364 .vb 2365 0 1 2 3 4 5 6 7 8 9 10 11 2366 -------------------------- 2367 row 3 |. . . d d d o o o o o o 2368 row 4 |. . . d d d o o o o o o 2369 row 5 |. . . d d d o o o o o o 2370 -------------------------- 2371 .ve 2372 2373 Thus, any entries in the d locations are stored in the d (diagonal) 2374 submatrix, and any entries in the o locations are stored in the 2375 o (off-diagonal) submatrix. Note that the d matrix is stored in 2376 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2377 2378 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2379 plus the diagonal part of the d matrix, 2380 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2381 In general, for PDE problems in which most nonzeros are near the diagonal, 2382 one expects `d_nz` >> `o_nz`. 2383 2384 .seealso: [](chapter_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2385 @*/ 2386 2387 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2388 { 2389 PetscMPIInt size; 2390 2391 PetscFunctionBegin; 2392 PetscCall(MatCreate(comm, A)); 2393 PetscCall(MatSetSizes(*A, m, n, M, N)); 2394 PetscCallMPI(MPI_Comm_size(comm, &size)); 2395 if (size > 1) { 2396 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2397 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2398 } else { 2399 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2400 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2401 } 2402 PetscFunctionReturn(PETSC_SUCCESS); 2403 } 2404 2405 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2406 { 2407 Mat mat; 2408 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2409 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2410 PetscScalar *array; 2411 2412 PetscFunctionBegin; 2413 *newmat = NULL; 2414 2415 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2416 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2417 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2418 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2419 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2420 2421 mat->factortype = matin->factortype; 2422 mat->preallocated = PETSC_TRUE; 2423 mat->assembled = PETSC_TRUE; 2424 mat->insertmode = NOT_SET_VALUES; 2425 2426 a = (Mat_MPISBAIJ *)mat->data; 2427 a->bs2 = oldmat->bs2; 2428 a->mbs = oldmat->mbs; 2429 a->nbs = oldmat->nbs; 2430 a->Mbs = oldmat->Mbs; 2431 a->Nbs = oldmat->Nbs; 2432 2433 a->size = oldmat->size; 2434 a->rank = oldmat->rank; 2435 a->donotstash = oldmat->donotstash; 2436 a->roworiented = oldmat->roworiented; 2437 a->rowindices = NULL; 2438 a->rowvalues = NULL; 2439 a->getrowactive = PETSC_FALSE; 2440 a->barray = NULL; 2441 a->rstartbs = oldmat->rstartbs; 2442 a->rendbs = oldmat->rendbs; 2443 a->cstartbs = oldmat->cstartbs; 2444 a->cendbs = oldmat->cendbs; 2445 2446 /* hash table stuff */ 2447 a->ht = NULL; 2448 a->hd = NULL; 2449 a->ht_size = 0; 2450 a->ht_flag = oldmat->ht_flag; 2451 a->ht_fact = oldmat->ht_fact; 2452 a->ht_total_ct = 0; 2453 a->ht_insert_ct = 0; 2454 2455 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2456 if (oldmat->colmap) { 2457 #if defined(PETSC_USE_CTABLE) 2458 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2459 #else 2460 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2461 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2462 #endif 2463 } else a->colmap = NULL; 2464 2465 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2466 PetscCall(PetscMalloc1(len, &a->garray)); 2467 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2468 } else a->garray = NULL; 2469 2470 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2471 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2472 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2473 2474 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2475 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2476 2477 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2478 PetscCall(VecGetArray(a->slvec1, &array)); 2479 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2480 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2481 PetscCall(VecRestoreArray(a->slvec1, &array)); 2482 PetscCall(VecGetArray(a->slvec0, &array)); 2483 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2484 PetscCall(VecRestoreArray(a->slvec0, &array)); 2485 2486 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2487 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2488 a->sMvctx = oldmat->sMvctx; 2489 2490 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2491 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2492 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2493 *newmat = mat; 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2498 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2499 2500 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2501 { 2502 PetscBool isbinary; 2503 2504 PetscFunctionBegin; 2505 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2506 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2507 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2508 PetscFunctionReturn(PETSC_SUCCESS); 2509 } 2510 2511 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2512 { 2513 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2514 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2515 PetscReal atmp; 2516 PetscReal *work, *svalues, *rvalues; 2517 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2518 PetscMPIInt rank, size; 2519 PetscInt *rowners_bs, dest, count, source; 2520 PetscScalar *va; 2521 MatScalar *ba; 2522 MPI_Status stat; 2523 2524 PetscFunctionBegin; 2525 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2526 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2527 PetscCall(VecGetArray(v, &va)); 2528 2529 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2530 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2531 2532 bs = A->rmap->bs; 2533 mbs = a->mbs; 2534 Mbs = a->Mbs; 2535 ba = b->a; 2536 bi = b->i; 2537 bj = b->j; 2538 2539 /* find ownerships */ 2540 rowners_bs = A->rmap->range; 2541 2542 /* each proc creates an array to be distributed */ 2543 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2544 2545 /* row_max for B */ 2546 if (rank != size - 1) { 2547 for (i = 0; i < mbs; i++) { 2548 ncols = bi[1] - bi[0]; 2549 bi++; 2550 brow = bs * i; 2551 for (j = 0; j < ncols; j++) { 2552 bcol = bs * (*bj); 2553 for (kcol = 0; kcol < bs; kcol++) { 2554 col = bcol + kcol; /* local col index */ 2555 col += rowners_bs[rank + 1]; /* global col index */ 2556 for (krow = 0; krow < bs; krow++) { 2557 atmp = PetscAbsScalar(*ba); 2558 ba++; 2559 row = brow + krow; /* local row index */ 2560 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2561 if (work[col] < atmp) work[col] = atmp; 2562 } 2563 } 2564 bj++; 2565 } 2566 } 2567 2568 /* send values to its owners */ 2569 for (dest = rank + 1; dest < size; dest++) { 2570 svalues = work + rowners_bs[dest]; 2571 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2572 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2573 } 2574 } 2575 2576 /* receive values */ 2577 if (rank) { 2578 rvalues = work; 2579 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2580 for (source = 0; source < rank; source++) { 2581 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2582 /* process values */ 2583 for (i = 0; i < count; i++) { 2584 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2585 } 2586 } 2587 } 2588 2589 PetscCall(VecRestoreArray(v, &va)); 2590 PetscCall(PetscFree(work)); 2591 PetscFunctionReturn(PETSC_SUCCESS); 2592 } 2593 2594 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2595 { 2596 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2597 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2598 PetscScalar *x, *ptr, *from; 2599 Vec bb1; 2600 const PetscScalar *b; 2601 2602 PetscFunctionBegin; 2603 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2604 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2605 2606 if (flag == SOR_APPLY_UPPER) { 2607 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2608 PetscFunctionReturn(PETSC_SUCCESS); 2609 } 2610 2611 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2612 if (flag & SOR_ZERO_INITIAL_GUESS) { 2613 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2614 its--; 2615 } 2616 2617 PetscCall(VecDuplicate(bb, &bb1)); 2618 while (its--) { 2619 /* lower triangular part: slvec0b = - B^T*xx */ 2620 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2621 2622 /* copy xx into slvec0a */ 2623 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2624 PetscCall(VecGetArray(xx, &x)); 2625 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2626 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2627 2628 PetscCall(VecScale(mat->slvec0, -1.0)); 2629 2630 /* copy bb into slvec1a */ 2631 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2632 PetscCall(VecGetArrayRead(bb, &b)); 2633 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2634 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2635 2636 /* set slvec1b = 0 */ 2637 PetscCall(VecSet(mat->slvec1b, 0.0)); 2638 2639 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2640 PetscCall(VecRestoreArray(xx, &x)); 2641 PetscCall(VecRestoreArrayRead(bb, &b)); 2642 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2643 2644 /* upper triangular part: bb1 = bb1 - B*x */ 2645 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2646 2647 /* local diagonal sweep */ 2648 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2649 } 2650 PetscCall(VecDestroy(&bb1)); 2651 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2652 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2653 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2654 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2655 } else if (flag & SOR_EISENSTAT) { 2656 Vec xx1; 2657 PetscBool hasop; 2658 const PetscScalar *diag; 2659 PetscScalar *sl, scale = (omega - 2.0) / omega; 2660 PetscInt i, n; 2661 2662 if (!mat->xx1) { 2663 PetscCall(VecDuplicate(bb, &mat->xx1)); 2664 PetscCall(VecDuplicate(bb, &mat->bb1)); 2665 } 2666 xx1 = mat->xx1; 2667 bb1 = mat->bb1; 2668 2669 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2670 2671 if (!mat->diag) { 2672 /* this is wrong for same matrix with new nonzero values */ 2673 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2674 PetscCall(MatGetDiagonal(matin, mat->diag)); 2675 } 2676 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2677 2678 if (hasop) { 2679 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2680 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2681 } else { 2682 /* 2683 These two lines are replaced by code that may be a bit faster for a good compiler 2684 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2685 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2686 */ 2687 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2688 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2689 PetscCall(VecGetArrayRead(bb, &b)); 2690 PetscCall(VecGetArray(xx, &x)); 2691 PetscCall(VecGetLocalSize(xx, &n)); 2692 if (omega == 1.0) { 2693 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2694 PetscCall(PetscLogFlops(2.0 * n)); 2695 } else { 2696 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2697 PetscCall(PetscLogFlops(3.0 * n)); 2698 } 2699 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2700 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2701 PetscCall(VecRestoreArrayRead(bb, &b)); 2702 PetscCall(VecRestoreArray(xx, &x)); 2703 } 2704 2705 /* multiply off-diagonal portion of matrix */ 2706 PetscCall(VecSet(mat->slvec1b, 0.0)); 2707 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2708 PetscCall(VecGetArray(mat->slvec0, &from)); 2709 PetscCall(VecGetArray(xx, &x)); 2710 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2711 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2712 PetscCall(VecRestoreArray(xx, &x)); 2713 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2714 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2715 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2716 2717 /* local sweep */ 2718 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2719 PetscCall(VecAXPY(xx, 1.0, xx1)); 2720 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 /*@ 2725 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2726 CSR format the local rows. 2727 2728 Collective 2729 2730 Input Parameters: 2731 + comm - MPI communicator 2732 . bs - the block size, only a block size of 1 is supported 2733 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2734 . n - This value should be the same as the local size used in creating the 2735 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2736 calculated if `N` is given) For square matrices `n` is almost always `m`. 2737 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2738 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2739 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2740 . j - column indices 2741 - a - matrix values 2742 2743 Output Parameter: 2744 . mat - the matrix 2745 2746 Level: intermediate 2747 2748 Notes: 2749 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2750 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2751 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2752 2753 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2754 2755 .seealso: [](chapter_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2756 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2757 @*/ 2758 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2759 { 2760 PetscFunctionBegin; 2761 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2762 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2763 PetscCall(MatCreate(comm, mat)); 2764 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2765 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2766 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2767 PetscFunctionReturn(PETSC_SUCCESS); 2768 } 2769 2770 /*@C 2771 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2772 2773 Collective 2774 2775 Input Parameters: 2776 + B - the matrix 2777 . bs - the block size 2778 . i - the indices into `j` for the start of each local row (starts with zero) 2779 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2780 - v - optional values in the matrix 2781 2782 Level: advanced 2783 2784 Notes: 2785 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2786 and usually the numerical values as well 2787 2788 Any entries below the diagonal are ignored 2789 2790 .seealso: [](chapter_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2791 @*/ 2792 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2793 { 2794 PetscFunctionBegin; 2795 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2796 PetscFunctionReturn(PETSC_SUCCESS); 2797 } 2798 2799 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2800 { 2801 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2802 PetscInt *indx; 2803 PetscScalar *values; 2804 2805 PetscFunctionBegin; 2806 PetscCall(MatGetSize(inmat, &m, &N)); 2807 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2808 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2809 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2810 PetscInt *bindx, rmax = a->rmax, j; 2811 PetscMPIInt rank, size; 2812 2813 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2814 mbs = m / bs; 2815 Nbs = N / cbs; 2816 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2817 nbs = n / cbs; 2818 2819 PetscCall(PetscMalloc1(rmax, &bindx)); 2820 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2821 2822 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2823 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2824 if (rank == size - 1) { 2825 /* Check sum(nbs) = Nbs */ 2826 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2827 } 2828 2829 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2830 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2831 for (i = 0; i < mbs; i++) { 2832 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2833 nnz = nnz / bs; 2834 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2835 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2836 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2837 } 2838 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2839 PetscCall(PetscFree(bindx)); 2840 2841 PetscCall(MatCreate(comm, outmat)); 2842 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2843 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2844 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2845 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2846 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2847 MatPreallocateEnd(dnz, onz); 2848 } 2849 2850 /* numeric phase */ 2851 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2852 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2853 2854 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2855 for (i = 0; i < m; i++) { 2856 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2857 Ii = i + rstart; 2858 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2859 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2860 } 2861 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2862 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2863 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2864 PetscFunctionReturn(PETSC_SUCCESS); 2865 } 2866