1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 a_noinsert:; \ 238 ailen[brow] = nrow; \ 239 } while (0) 240 241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 242 do { \ 243 brow = row / bs; \ 244 rp = bj + bi[brow]; \ 245 ap = ba + bs2 * bi[brow]; \ 246 rmax = bimax[brow]; \ 247 nrow = bilen[brow]; \ 248 bcol = col / bs; \ 249 ridx = row % bs; \ 250 cidx = col % bs; \ 251 low = 0; \ 252 high = nrow; \ 253 while (high - low > 3) { \ 254 t = (low + high) / 2; \ 255 if (rp[t] > bcol) high = t; \ 256 else low = t; \ 257 } \ 258 for (_i = low; _i < high; _i++) { \ 259 if (rp[_i] > bcol) break; \ 260 if (rp[_i] == bcol) { \ 261 bap = ap + bs2 * _i + bs * cidx + ridx; \ 262 if (addv == ADD_VALUES) *bap += value; \ 263 else *bap = value; \ 264 goto b_noinsert; \ 265 } \ 266 } \ 267 if (b->nonew == 1) goto b_noinsert; \ 268 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 269 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 270 N = nrow++ - 1; \ 271 /* shift up all the later entries in this row */ \ 272 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 273 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 274 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 275 rp[_i] = bcol; \ 276 ap[bs2 * _i + bs * cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } while (0) 280 281 /* Only add/insert a(i,j) with i<=j (blocks). 282 Any a(i,j) with i>j input by user is ignored or generates an error 283 */ 284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 285 { 286 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 287 MatScalar value; 288 PetscBool roworiented = baij->roworiented; 289 PetscInt i, j, row, col; 290 PetscInt rstart_orig = mat->rmap->rstart; 291 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 292 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 293 294 /* Some Variables required in the macro */ 295 Mat A = baij->A; 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 297 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 298 MatScalar *aa = a->a; 299 300 Mat B = baij->B; 301 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 302 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 303 MatScalar *ba = b->a; 304 305 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 306 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 307 MatScalar *ap, *bap; 308 309 /* for stash */ 310 PetscInt n_loc, *in_loc = NULL; 311 MatScalar *v_loc = NULL; 312 313 PetscFunctionBegin; 314 if (!baij->donotstash) { 315 if (n > baij->n_loc) { 316 PetscCall(PetscFree(baij->in_loc)); 317 PetscCall(PetscFree(baij->v_loc)); 318 PetscCall(PetscMalloc1(n, &baij->in_loc)); 319 PetscCall(PetscMalloc1(n, &baij->v_loc)); 320 321 baij->n_loc = n; 322 } 323 in_loc = baij->in_loc; 324 v_loc = baij->v_loc; 325 } 326 327 for (i = 0; i < m; i++) { 328 if (im[i] < 0) continue; 329 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 330 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 331 row = im[i] - rstart_orig; /* local row index */ 332 for (j = 0; j < n; j++) { 333 if (im[i] / bs > in[j] / bs) { 334 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 335 continue; /* ignore lower triangular blocks */ 336 } 337 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 338 col = in[j] - cstart_orig; /* local col index */ 339 brow = row / bs; 340 bcol = col / bs; 341 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 342 if (roworiented) value = v[i * n + j]; 343 else value = v[i + j * m]; 344 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 345 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 346 } else if (in[j] < 0) { 347 continue; 348 } else { 349 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 350 /* off-diag entry (B) */ 351 if (mat->was_assembled) { 352 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 353 #if defined(PETSC_USE_CTABLE) 354 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 355 col = col - 1; 356 #else 357 col = baij->colmap[in[j] / bs] - 1; 358 #endif 359 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) { 360 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 361 col = in[j]; 362 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 363 B = baij->B; 364 b = (Mat_SeqBAIJ *)B->data; 365 bimax = b->imax; 366 bi = b->i; 367 bilen = b->ilen; 368 bj = b->j; 369 ba = b->a; 370 } else col += in[j] % bs; 371 } else col = in[j]; 372 if (roworiented) value = v[i * n + j]; 373 else value = v[i + j * m]; 374 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 375 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 376 } 377 } 378 } else { /* off processor entry */ 379 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 380 if (!baij->donotstash) { 381 mat->assembled = PETSC_FALSE; 382 n_loc = 0; 383 for (j = 0; j < n; j++) { 384 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 385 in_loc[n_loc] = in[j]; 386 if (roworiented) { 387 v_loc[n_loc] = v[i * n + j]; 388 } else { 389 v_loc[n_loc] = v[j * m + i]; 390 } 391 n_loc++; 392 } 393 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 394 } 395 } 396 } 397 PetscFunctionReturn(PETSC_SUCCESS); 398 } 399 400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 401 { 402 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 403 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 404 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 405 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 406 PetscBool roworiented = a->roworiented; 407 const PetscScalar *value = v; 408 MatScalar *ap, *aa = a->a, *bap; 409 410 PetscFunctionBegin; 411 if (col < row) { 412 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 413 PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 414 } 415 rp = aj + ai[row]; 416 ap = aa + bs2 * ai[row]; 417 rmax = imax[row]; 418 nrow = ailen[row]; 419 value = v; 420 low = 0; 421 high = nrow; 422 423 while (high - low > 7) { 424 t = (low + high) / 2; 425 if (rp[t] > col) high = t; 426 else low = t; 427 } 428 for (i = low; i < high; i++) { 429 if (rp[i] > col) break; 430 if (rp[i] == col) { 431 bap = ap + bs2 * i; 432 if (roworiented) { 433 if (is == ADD_VALUES) { 434 for (ii = 0; ii < bs; ii++) { 435 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 436 } 437 } else { 438 for (ii = 0; ii < bs; ii++) { 439 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 440 } 441 } 442 } else { 443 if (is == ADD_VALUES) { 444 for (ii = 0; ii < bs; ii++) { 445 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 446 } 447 } else { 448 for (ii = 0; ii < bs; ii++) { 449 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 450 } 451 } 452 } 453 goto noinsert2; 454 } 455 } 456 if (nonew == 1) goto noinsert2; 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 458 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 459 N = nrow++ - 1; 460 high++; 461 /* shift up all the later entries in this row */ 462 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 463 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 464 rp[i] = col; 465 bap = ap + bs2 * i; 466 if (roworiented) { 467 for (ii = 0; ii < bs; ii++) { 468 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 469 } 470 } else { 471 for (ii = 0; ii < bs; ii++) { 472 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 473 } 474 } 475 noinsert2:; 476 ailen[row] = nrow; 477 PetscFunctionReturn(PETSC_SUCCESS); 478 } 479 480 /* 481 This routine is exactly duplicated in mpibaij.c 482 */ 483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 484 { 485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 486 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 487 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 488 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 489 PetscBool roworiented = a->roworiented; 490 const PetscScalar *value = v; 491 MatScalar *ap, *aa = a->a, *bap; 492 493 PetscFunctionBegin; 494 rp = aj + ai[row]; 495 ap = aa + bs2 * ai[row]; 496 rmax = imax[row]; 497 nrow = ailen[row]; 498 low = 0; 499 high = nrow; 500 value = v; 501 while (high - low > 7) { 502 t = (low + high) / 2; 503 if (rp[t] > col) high = t; 504 else low = t; 505 } 506 for (i = low; i < high; i++) { 507 if (rp[i] > col) break; 508 if (rp[i] == col) { 509 bap = ap + bs2 * i; 510 if (roworiented) { 511 if (is == ADD_VALUES) { 512 for (ii = 0; ii < bs; ii++) { 513 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 514 } 515 } else { 516 for (ii = 0; ii < bs; ii++) { 517 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 518 } 519 } 520 } else { 521 if (is == ADD_VALUES) { 522 for (ii = 0; ii < bs; ii++, value += bs) { 523 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 524 bap += bs; 525 } 526 } else { 527 for (ii = 0; ii < bs; ii++, value += bs) { 528 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 529 bap += bs; 530 } 531 } 532 } 533 goto noinsert2; 534 } 535 } 536 if (nonew == 1) goto noinsert2; 537 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 538 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 539 N = nrow++ - 1; 540 high++; 541 /* shift up all the later entries in this row */ 542 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 543 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 544 rp[i] = col; 545 bap = ap + bs2 * i; 546 if (roworiented) { 547 for (ii = 0; ii < bs; ii++) { 548 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 549 } 550 } else { 551 for (ii = 0; ii < bs; ii++) { 552 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 553 } 554 } 555 noinsert2:; 556 ailen[row] = nrow; 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 /* 561 This routine could be optimized by removing the need for the block copy below and passing stride information 562 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 563 */ 564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 565 { 566 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 567 const MatScalar *value; 568 MatScalar *barray = baij->barray; 569 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 570 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 571 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 572 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 573 574 PetscFunctionBegin; 575 if (!barray) { 576 PetscCall(PetscMalloc1(bs2, &barray)); 577 baij->barray = barray; 578 } 579 580 if (roworiented) { 581 stepval = (n - 1) * bs; 582 } else { 583 stepval = (m - 1) * bs; 584 } 585 for (i = 0; i < m; i++) { 586 if (im[i] < 0) continue; 587 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 588 if (im[i] >= rstart && im[i] < rend) { 589 row = im[i] - rstart; 590 for (j = 0; j < n; j++) { 591 if (im[i] > in[j]) { 592 PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 593 continue; /* ignore lower triangular blocks */ 594 } 595 /* If NumCol = 1 then a copy is not required */ 596 if (roworiented && n == 1) { 597 barray = (MatScalar *)v + i * bs2; 598 } else if ((!roworiented) && (m == 1)) { 599 barray = (MatScalar *)v + j * bs2; 600 } else { /* Here a copy is required */ 601 if (roworiented) { 602 value = v + i * (stepval + bs) * bs + j * bs; 603 } else { 604 value = v + j * (stepval + bs) * bs + i * bs; 605 } 606 for (ii = 0; ii < bs; ii++, value += stepval) { 607 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 608 } 609 barray -= bs2; 610 } 611 612 if (in[j] >= cstart && in[j] < cend) { 613 col = in[j] - cstart; 614 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 615 } else if (in[j] < 0) { 616 continue; 617 } else { 618 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 619 if (mat->was_assembled) { 620 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 621 622 #if defined(PETSC_USE_CTABLE) 623 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 624 col = col < 1 ? -1 : (col - 1) / bs; 625 #else 626 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 627 #endif 628 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 629 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 630 col = in[j]; 631 } 632 } else col = in[j]; 633 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 634 } 635 } 636 } else { 637 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 651 { 652 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 653 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 654 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 655 656 PetscFunctionBegin; 657 for (i = 0; i < m; i++) { 658 if (idxm[i] < 0) continue; /* negative row */ 659 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 660 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 661 row = idxm[i] - bsrstart; 662 for (j = 0; j < n; j++) { 663 if (idxn[j] < 0) continue; /* negative column */ 664 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 665 if (idxn[j] >= bscstart && idxn[j] < bscend) { 666 col = idxn[j] - bscstart; 667 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 668 } else { 669 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 670 #if defined(PETSC_USE_CTABLE) 671 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 672 data--; 673 #else 674 data = baij->colmap[idxn[j] / bs] - 1; 675 #endif 676 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 677 else { 678 col = data + idxn[j] % bs; 679 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 680 } 681 } 682 } 683 } 684 PetscFunctionReturn(PETSC_SUCCESS); 685 } 686 687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 688 { 689 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 690 PetscReal sum[2], *lnorm2; 691 692 PetscFunctionBegin; 693 if (baij->size == 1) { 694 PetscCall(MatNorm(baij->A, type, norm)); 695 } else { 696 if (type == NORM_FROBENIUS) { 697 PetscCall(PetscMalloc1(2, &lnorm2)); 698 PetscCall(MatNorm(baij->A, type, lnorm2)); 699 *lnorm2 = (*lnorm2) * (*lnorm2); 700 lnorm2++; /* square power of norm(A) */ 701 PetscCall(MatNorm(baij->B, type, lnorm2)); 702 *lnorm2 = (*lnorm2) * (*lnorm2); 703 lnorm2--; /* square power of norm(B) */ 704 PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 705 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 706 PetscCall(PetscFree(lnorm2)); 707 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 708 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 709 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 710 PetscReal *rsum, vabs; 711 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 712 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 713 MatScalar *v; 714 715 PetscCall(PetscCalloc1(mat->cmap->N, &rsum)); 716 /* Amat */ 717 v = amat->a; 718 jj = amat->j; 719 for (brow = 0; brow < mbs; brow++) { 720 grow = bs * (rstart + brow); 721 nz = amat->i[brow + 1] - amat->i[brow]; 722 for (bcol = 0; bcol < nz; bcol++) { 723 gcol = bs * (rstart + *jj); 724 jj++; 725 for (col = 0; col < bs; col++) { 726 for (row = 0; row < bs; row++) { 727 vabs = PetscAbsScalar(*v); 728 v++; 729 rsum[gcol + col] += vabs; 730 /* non-diagonal block */ 731 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 732 } 733 } 734 } 735 PetscCall(PetscLogFlops(nz * bs * bs)); 736 } 737 /* Bmat */ 738 v = bmat->a; 739 jj = bmat->j; 740 for (brow = 0; brow < mbs; brow++) { 741 grow = bs * (rstart + brow); 742 nz = bmat->i[brow + 1] - bmat->i[brow]; 743 for (bcol = 0; bcol < nz; bcol++) { 744 gcol = bs * garray[*jj]; 745 jj++; 746 for (col = 0; col < bs; col++) { 747 for (row = 0; row < bs; row++) { 748 vabs = PetscAbsScalar(*v); 749 v++; 750 rsum[gcol + col] += vabs; 751 rsum[grow + row] += vabs; 752 } 753 } 754 } 755 PetscCall(PetscLogFlops(nz * bs * bs)); 756 } 757 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 758 *norm = 0.0; 759 for (col = 0; col < mat->cmap->N; col++) { 760 if (rsum[col] > *norm) *norm = rsum[col]; 761 } 762 PetscCall(PetscFree(rsum)); 763 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 764 } 765 PetscFunctionReturn(PETSC_SUCCESS); 766 } 767 768 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 769 { 770 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 771 PetscInt nstash, reallocs; 772 773 PetscFunctionBegin; 774 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 775 776 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 777 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 778 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 779 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 780 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 781 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 782 PetscFunctionReturn(PETSC_SUCCESS); 783 } 784 785 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 786 { 787 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 789 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 790 PetscInt *row, *col; 791 PetscBool all_assembled; 792 PetscMPIInt n; 793 PetscBool r1, r2, r3; 794 MatScalar *val; 795 796 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 797 PetscFunctionBegin; 798 if (!baij->donotstash && !mat->nooffprocentries) { 799 while (1) { 800 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 801 if (!flg) break; 802 803 for (i = 0; i < n;) { 804 /* Now identify the consecutive vals belonging to the same row */ 805 for (j = i, rstart = row[j]; j < n; j++) { 806 if (row[j] != rstart) break; 807 } 808 if (j < n) ncols = j - i; 809 else ncols = n - i; 810 /* Now assemble all these values with a single function call */ 811 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 812 i = j; 813 } 814 } 815 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 816 /* Now process the block-stash. Since the values are stashed column-oriented, 817 set the row-oriented flag to column-oriented, and after MatSetValues() 818 restore the original flags */ 819 r1 = baij->roworiented; 820 r2 = a->roworiented; 821 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 822 823 baij->roworiented = PETSC_FALSE; 824 a->roworiented = PETSC_FALSE; 825 826 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */ 827 while (1) { 828 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 829 if (!flg) break; 830 831 for (i = 0; i < n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j = i, rstart = row[j]; j < n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j - i; 837 else ncols = n - i; 838 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 839 i = j; 840 } 841 } 842 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 843 844 baij->roworiented = r1; 845 a->roworiented = r2; 846 847 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 848 } 849 850 PetscCall(MatAssemblyBegin(baij->A, mode)); 851 PetscCall(MatAssemblyEnd(baij->A, mode)); 852 853 /* determine if any process has disassembled, if so we must 854 also disassemble ourselves, in order that we may reassemble. */ 855 /* 856 if nonzero structure of submatrix B cannot change then we know that 857 no process disassembled thus we can skip this stuff 858 */ 859 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 860 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 861 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 862 } 863 864 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ 865 PetscCall(MatAssemblyBegin(baij->B, mode)); 866 PetscCall(MatAssemblyEnd(baij->B, mode)); 867 868 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 869 870 baij->rowvalues = NULL; 871 872 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 873 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 874 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 875 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 876 } 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 881 #include <petscdraw.h> 882 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 883 { 884 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 885 PetscInt bs = mat->rmap->bs; 886 PetscMPIInt rank = baij->rank; 887 PetscBool isascii, isdraw; 888 PetscViewer sviewer; 889 PetscViewerFormat format; 890 891 PetscFunctionBegin; 892 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 893 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 894 if (isascii) { 895 PetscCall(PetscViewerGetFormat(viewer, &format)); 896 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 897 MatInfo info; 898 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 899 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 900 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 901 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 902 mat->rmap->bs, info.memory)); 903 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 904 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 905 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 906 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 907 PetscCall(PetscViewerFlush(viewer)); 908 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 909 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 910 PetscCall(VecScatterView(baij->Mvctx, viewer)); 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } else if (format == PETSC_VIEWER_ASCII_INFO) { 913 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 914 PetscFunctionReturn(PETSC_SUCCESS); 915 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 } 919 920 if (isdraw) { 921 PetscDraw draw; 922 PetscBool isnull; 923 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 924 PetscCall(PetscDrawIsNull(draw, &isnull)); 925 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 926 } 927 928 { 929 /* assemble the entire matrix onto first processor. */ 930 Mat A; 931 Mat_SeqSBAIJ *Aloc; 932 Mat_SeqBAIJ *Bloc; 933 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 934 MatScalar *a; 935 const char *matname; 936 937 /* Should this be the same type as mat? */ 938 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 939 if (rank == 0) { 940 PetscCall(MatSetSizes(A, M, N, M, N)); 941 } else { 942 PetscCall(MatSetSizes(A, 0, 0, M, N)); 943 } 944 PetscCall(MatSetType(A, MATMPISBAIJ)); 945 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 946 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 947 948 /* copy over the A part */ 949 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 950 ai = Aloc->i; 951 aj = Aloc->j; 952 a = Aloc->a; 953 PetscCall(PetscMalloc1(bs, &rvals)); 954 955 for (i = 0; i < mbs; i++) { 956 rvals[0] = bs * (baij->rstartbs + i); 957 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 958 for (j = ai[i]; j < ai[i + 1]; j++) { 959 col = (baij->cstartbs + aj[j]) * bs; 960 for (k = 0; k < bs; k++) { 961 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 962 col++; 963 a += bs; 964 } 965 } 966 } 967 /* copy over the B part */ 968 Bloc = (Mat_SeqBAIJ *)baij->B->data; 969 ai = Bloc->i; 970 aj = Bloc->j; 971 a = Bloc->a; 972 for (i = 0; i < mbs; i++) { 973 rvals[0] = bs * (baij->rstartbs + i); 974 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 975 for (j = ai[i]; j < ai[i + 1]; j++) { 976 col = baij->garray[aj[j]] * bs; 977 for (k = 0; k < bs; k++) { 978 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 979 col++; 980 a += bs; 981 } 982 } 983 } 984 PetscCall(PetscFree(rvals)); 985 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 986 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 987 /* 988 Everyone has to call to draw the matrix since the graphics waits are 989 synchronized across all processors that share the PetscDraw object 990 */ 991 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 992 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 993 if (rank == 0) { 994 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname)); 995 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer)); 996 } 997 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 998 PetscCall(MatDestroy(&A)); 999 } 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1004 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1005 1006 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1007 { 1008 PetscBool isascii, isdraw, issocket, isbinary; 1009 1010 PetscFunctionBegin; 1011 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1012 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1013 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1014 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1015 if (isascii || isdraw || issocket) PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1016 else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1017 PetscFunctionReturn(PETSC_SUCCESS); 1018 } 1019 1020 #if defined(PETSC_USE_COMPLEX) 1021 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1022 { 1023 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1024 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1025 PetscScalar *from; 1026 const PetscScalar *x; 1027 1028 PetscFunctionBegin; 1029 /* diagonal part */ 1030 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1031 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1032 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1033 PetscCall(VecZeroEntries(a->slvec1b)); 1034 1035 /* subdiagonal part */ 1036 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1037 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1038 1039 /* copy x into the vec slvec0 */ 1040 PetscCall(VecGetArray(a->slvec0, &from)); 1041 PetscCall(VecGetArrayRead(xx, &x)); 1042 1043 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1044 PetscCall(VecRestoreArray(a->slvec0, &from)); 1045 PetscCall(VecRestoreArrayRead(xx, &x)); 1046 1047 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1048 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1049 /* supperdiagonal part */ 1050 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 #endif 1054 1055 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1056 { 1057 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1058 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1059 PetscScalar *from; 1060 const PetscScalar *x; 1061 1062 PetscFunctionBegin; 1063 /* diagonal part */ 1064 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1065 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1066 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1067 PetscCall(VecZeroEntries(a->slvec1b)); 1068 1069 /* subdiagonal part */ 1070 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1071 1072 /* copy x into the vec slvec0 */ 1073 PetscCall(VecGetArray(a->slvec0, &from)); 1074 PetscCall(VecGetArrayRead(xx, &x)); 1075 1076 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1077 PetscCall(VecRestoreArray(a->slvec0, &from)); 1078 PetscCall(VecRestoreArrayRead(xx, &x)); 1079 1080 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1081 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1082 /* supperdiagonal part */ 1083 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1084 PetscFunctionReturn(PETSC_SUCCESS); 1085 } 1086 1087 #if PetscDefined(USE_COMPLEX) 1088 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1089 { 1090 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1091 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1092 PetscScalar *from; 1093 const PetscScalar *x; 1094 1095 PetscFunctionBegin; 1096 /* diagonal part */ 1097 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1098 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1099 PetscCall(VecZeroEntries(a->slvec1b)); 1100 1101 /* subdiagonal part */ 1102 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1103 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1104 1105 /* copy x into the vec slvec0 */ 1106 PetscCall(VecGetArray(a->slvec0, &from)); 1107 PetscCall(VecGetArrayRead(xx, &x)); 1108 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1109 PetscCall(VecRestoreArray(a->slvec0, &from)); 1110 1111 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1112 PetscCall(VecRestoreArrayRead(xx, &x)); 1113 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1114 1115 /* supperdiagonal part */ 1116 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1117 PetscFunctionReturn(PETSC_SUCCESS); 1118 } 1119 #endif 1120 1121 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1122 { 1123 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1124 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1125 PetscScalar *from; 1126 const PetscScalar *x; 1127 1128 PetscFunctionBegin; 1129 /* diagonal part */ 1130 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1131 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1132 PetscCall(VecZeroEntries(a->slvec1b)); 1133 1134 /* subdiagonal part */ 1135 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1136 1137 /* copy x into the vec slvec0 */ 1138 PetscCall(VecGetArray(a->slvec0, &from)); 1139 PetscCall(VecGetArrayRead(xx, &x)); 1140 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1141 PetscCall(VecRestoreArray(a->slvec0, &from)); 1142 1143 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1144 PetscCall(VecRestoreArrayRead(xx, &x)); 1145 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1146 1147 /* supperdiagonal part */ 1148 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1149 PetscFunctionReturn(PETSC_SUCCESS); 1150 } 1151 1152 /* 1153 This only works correctly for square matrices where the subblock A->A is the 1154 diagonal block 1155 */ 1156 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1157 { 1158 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1159 1160 PetscFunctionBegin; 1161 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1162 PetscCall(MatGetDiagonal(a->A, v)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1167 { 1168 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCall(MatScale(a->A, aa)); 1172 PetscCall(MatScale(a->B, aa)); 1173 PetscFunctionReturn(PETSC_SUCCESS); 1174 } 1175 1176 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1177 { 1178 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1179 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1180 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1181 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1182 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1183 1184 PetscFunctionBegin; 1185 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1186 mat->getrowactive = PETSC_TRUE; 1187 1188 if (!mat->rowvalues && (idx || v)) { 1189 /* 1190 allocate enough space to hold information from the longest row. 1191 */ 1192 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1193 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1194 PetscInt max = 1, mbs = mat->mbs, tmp; 1195 for (i = 0; i < mbs; i++) { 1196 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1197 if (max < tmp) max = tmp; 1198 } 1199 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1200 } 1201 1202 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1203 lrow = row - brstart; /* local row index */ 1204 1205 pvA = &vworkA; 1206 pcA = &cworkA; 1207 pvB = &vworkB; 1208 pcB = &cworkB; 1209 if (!v) { 1210 pvA = NULL; 1211 pvB = NULL; 1212 } 1213 if (!idx) { 1214 pcA = NULL; 1215 if (!v) pcB = NULL; 1216 } 1217 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1218 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1219 nztot = nzA + nzB; 1220 1221 cmap = mat->garray; 1222 if (v || idx) { 1223 if (nztot) { 1224 /* Sort by increasing column numbers, assuming A and B already sorted */ 1225 PetscInt imark = -1; 1226 if (v) { 1227 *v = v_p = mat->rowvalues; 1228 for (i = 0; i < nzB; i++) { 1229 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1230 else break; 1231 } 1232 imark = i; 1233 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1234 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1235 } 1236 if (idx) { 1237 *idx = idx_p = mat->rowindices; 1238 if (imark > -1) { 1239 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1240 } else { 1241 for (i = 0; i < nzB; i++) { 1242 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1243 else break; 1244 } 1245 imark = i; 1246 } 1247 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1248 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1249 } 1250 } else { 1251 if (idx) *idx = NULL; 1252 if (v) *v = NULL; 1253 } 1254 } 1255 *nz = nztot; 1256 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1257 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1262 { 1263 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1264 1265 PetscFunctionBegin; 1266 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1267 baij->getrowactive = PETSC_FALSE; 1268 PetscFunctionReturn(PETSC_SUCCESS); 1269 } 1270 1271 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1272 { 1273 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1274 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1275 1276 PetscFunctionBegin; 1277 aA->getrow_utriangular = PETSC_TRUE; 1278 PetscFunctionReturn(PETSC_SUCCESS); 1279 } 1280 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1281 { 1282 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1283 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1284 1285 PetscFunctionBegin; 1286 aA->getrow_utriangular = PETSC_FALSE; 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 1290 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1291 { 1292 PetscFunctionBegin; 1293 if (PetscDefined(USE_COMPLEX)) { 1294 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1295 1296 PetscCall(MatConjugate(a->A)); 1297 PetscCall(MatConjugate(a->B)); 1298 } 1299 PetscFunctionReturn(PETSC_SUCCESS); 1300 } 1301 1302 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1303 { 1304 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1305 1306 PetscFunctionBegin; 1307 PetscCall(MatRealPart(a->A)); 1308 PetscCall(MatRealPart(a->B)); 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } 1311 1312 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1313 { 1314 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1315 1316 PetscFunctionBegin; 1317 PetscCall(MatImaginaryPart(a->A)); 1318 PetscCall(MatImaginaryPart(a->B)); 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } 1321 1322 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1323 Input: isrow - distributed(parallel), 1324 iscol_local - locally owned (seq) 1325 */ 1326 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1327 { 1328 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1329 const PetscInt *ptr1, *ptr2; 1330 1331 PetscFunctionBegin; 1332 *flg = PETSC_FALSE; 1333 PetscCall(ISGetLocalSize(isrow, &sz1)); 1334 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1335 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1336 1337 PetscCall(ISGetIndices(isrow, &ptr1)); 1338 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1339 1340 PetscCall(PetscMalloc1(sz1, &a1)); 1341 PetscCall(PetscMalloc1(sz2, &a2)); 1342 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1343 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1344 PetscCall(PetscSortInt(sz1, a1)); 1345 PetscCall(PetscSortInt(sz2, a2)); 1346 1347 nmatch = 0; 1348 k = 0; 1349 for (i = 0; i < sz1; i++) { 1350 for (j = k; j < sz2; j++) { 1351 if (a1[i] == a2[j]) { 1352 k = j; 1353 nmatch++; 1354 break; 1355 } 1356 } 1357 } 1358 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1359 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1360 PetscCall(PetscFree(a1)); 1361 PetscCall(PetscFree(a2)); 1362 if (nmatch < sz1) { 1363 *flg = PETSC_FALSE; 1364 } else { 1365 *flg = PETSC_TRUE; 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1371 { 1372 Mat C[2]; 1373 IS iscol_local, isrow_local; 1374 PetscInt csize, csize_local, rsize; 1375 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1376 1377 PetscFunctionBegin; 1378 PetscCall(ISGetLocalSize(iscol, &csize)); 1379 PetscCall(ISGetLocalSize(isrow, &rsize)); 1380 if (call == MAT_REUSE_MATRIX) { 1381 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1382 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1383 } else { 1384 PetscCall(ISAllGather(iscol, &iscol_local)); 1385 PetscCall(ISSorted(iscol_local, &issorted)); 1386 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1387 } 1388 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1389 if (!isequal) { 1390 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1391 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1392 if (!isidentity) { 1393 if (call == MAT_REUSE_MATRIX) { 1394 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1395 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1396 } else { 1397 PetscCall(ISAllGather(isrow, &isrow_local)); 1398 PetscCall(ISSorted(isrow_local, &issorted)); 1399 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1400 } 1401 } 1402 } 1403 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1404 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1405 if (!isequal && !isidentity) { 1406 if (call == MAT_INITIAL_MATRIX) { 1407 IS intersect; 1408 PetscInt ni; 1409 1410 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1411 PetscCall(ISGetLocalSize(intersect, &ni)); 1412 PetscCall(ISDestroy(&intersect)); 1413 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1414 } 1415 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1416 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1417 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1418 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1419 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1420 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1421 PetscCall(MatDestroy(C)); 1422 PetscCall(MatDestroy(C + 1)); 1423 } 1424 if (call == MAT_INITIAL_MATRIX) { 1425 if (!isequal && !isidentity) { 1426 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1427 PetscCall(ISDestroy(&isrow_local)); 1428 } 1429 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1430 PetscCall(ISDestroy(&iscol_local)); 1431 } 1432 PetscFunctionReturn(PETSC_SUCCESS); 1433 } 1434 1435 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1436 { 1437 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1438 1439 PetscFunctionBegin; 1440 PetscCall(MatZeroEntries(l->A)); 1441 PetscCall(MatZeroEntries(l->B)); 1442 PetscFunctionReturn(PETSC_SUCCESS); 1443 } 1444 1445 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1446 { 1447 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1448 Mat A = a->A, B = a->B; 1449 PetscLogDouble isend[5], irecv[5]; 1450 1451 PetscFunctionBegin; 1452 info->block_size = (PetscReal)matin->rmap->bs; 1453 1454 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1455 1456 isend[0] = info->nz_used; 1457 isend[1] = info->nz_allocated; 1458 isend[2] = info->nz_unneeded; 1459 isend[3] = info->memory; 1460 isend[4] = info->mallocs; 1461 1462 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1463 1464 isend[0] += info->nz_used; 1465 isend[1] += info->nz_allocated; 1466 isend[2] += info->nz_unneeded; 1467 isend[3] += info->memory; 1468 isend[4] += info->mallocs; 1469 if (flag == MAT_LOCAL) { 1470 info->nz_used = isend[0]; 1471 info->nz_allocated = isend[1]; 1472 info->nz_unneeded = isend[2]; 1473 info->memory = isend[3]; 1474 info->mallocs = isend[4]; 1475 } else if (flag == MAT_GLOBAL_MAX) { 1476 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1477 1478 info->nz_used = irecv[0]; 1479 info->nz_allocated = irecv[1]; 1480 info->nz_unneeded = irecv[2]; 1481 info->memory = irecv[3]; 1482 info->mallocs = irecv[4]; 1483 } else if (flag == MAT_GLOBAL_SUM) { 1484 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1485 1486 info->nz_used = irecv[0]; 1487 info->nz_allocated = irecv[1]; 1488 info->nz_unneeded = irecv[2]; 1489 info->memory = irecv[3]; 1490 info->mallocs = irecv[4]; 1491 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1492 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1493 info->fill_ratio_needed = 0; 1494 info->factor_mallocs = 0; 1495 PetscFunctionReturn(PETSC_SUCCESS); 1496 } 1497 1498 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1499 { 1500 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1501 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1502 1503 PetscFunctionBegin; 1504 switch (op) { 1505 case MAT_NEW_NONZERO_LOCATIONS: 1506 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1507 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1508 case MAT_KEEP_NONZERO_PATTERN: 1509 case MAT_NEW_NONZERO_LOCATION_ERR: 1510 MatCheckPreallocated(A, 1); 1511 PetscCall(MatSetOption(a->A, op, flg)); 1512 PetscCall(MatSetOption(a->B, op, flg)); 1513 break; 1514 case MAT_ROW_ORIENTED: 1515 MatCheckPreallocated(A, 1); 1516 a->roworiented = flg; 1517 1518 PetscCall(MatSetOption(a->A, op, flg)); 1519 PetscCall(MatSetOption(a->B, op, flg)); 1520 break; 1521 case MAT_IGNORE_OFF_PROC_ENTRIES: 1522 a->donotstash = flg; 1523 break; 1524 case MAT_USE_HASH_TABLE: 1525 a->ht_flag = flg; 1526 break; 1527 case MAT_HERMITIAN: 1528 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1529 #if defined(PETSC_USE_COMPLEX) 1530 if (flg) { /* need different mat-vec ops */ 1531 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1532 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1533 A->ops->multtranspose = NULL; 1534 A->ops->multtransposeadd = NULL; 1535 A->symmetric = PETSC_BOOL3_FALSE; 1536 } 1537 #endif 1538 break; 1539 case MAT_SPD: 1540 case MAT_SYMMETRIC: 1541 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1542 #if defined(PETSC_USE_COMPLEX) 1543 if (flg) { /* restore to use default mat-vec ops */ 1544 A->ops->mult = MatMult_MPISBAIJ; 1545 A->ops->multadd = MatMultAdd_MPISBAIJ; 1546 A->ops->multtranspose = MatMult_MPISBAIJ; 1547 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1548 } 1549 #endif 1550 break; 1551 case MAT_STRUCTURALLY_SYMMETRIC: 1552 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1553 break; 1554 case MAT_IGNORE_LOWER_TRIANGULAR: 1555 case MAT_ERROR_LOWER_TRIANGULAR: 1556 aA->ignore_ltriangular = flg; 1557 break; 1558 case MAT_GETROW_UPPERTRIANGULAR: 1559 aA->getrow_utriangular = flg; 1560 break; 1561 default: 1562 break; 1563 } 1564 PetscFunctionReturn(PETSC_SUCCESS); 1565 } 1566 1567 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1568 { 1569 PetscFunctionBegin; 1570 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1571 if (reuse == MAT_INITIAL_MATRIX) { 1572 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1573 } else if (reuse == MAT_REUSE_MATRIX) { 1574 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1575 } 1576 PetscFunctionReturn(PETSC_SUCCESS); 1577 } 1578 1579 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1580 { 1581 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1582 Mat a = baij->A, b = baij->B; 1583 PetscInt nv, m, n; 1584 PetscBool flg; 1585 1586 PetscFunctionBegin; 1587 if (ll != rr) { 1588 PetscCall(VecEqual(ll, rr, &flg)); 1589 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1590 } 1591 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1592 1593 PetscCall(MatGetLocalSize(mat, &m, &n)); 1594 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1595 1596 PetscCall(VecGetLocalSize(rr, &nv)); 1597 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1598 1599 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1600 1601 /* left diagonalscale the off-diagonal part */ 1602 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1603 1604 /* scale the diagonal part */ 1605 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1606 1607 /* right diagonalscale the off-diagonal part */ 1608 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1609 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1614 { 1615 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1616 1617 PetscFunctionBegin; 1618 PetscCall(MatSetUnfactored(a->A)); 1619 PetscFunctionReturn(PETSC_SUCCESS); 1620 } 1621 1622 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1623 1624 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1625 { 1626 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1627 Mat a, b, c, d; 1628 PetscBool flg; 1629 1630 PetscFunctionBegin; 1631 a = matA->A; 1632 b = matA->B; 1633 c = matB->A; 1634 d = matB->B; 1635 1636 PetscCall(MatEqual(a, c, &flg)); 1637 if (flg) PetscCall(MatEqual(b, d, &flg)); 1638 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1639 PetscFunctionReturn(PETSC_SUCCESS); 1640 } 1641 1642 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1643 { 1644 PetscBool isbaij; 1645 1646 PetscFunctionBegin; 1647 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1648 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1649 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1650 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1651 PetscCall(MatGetRowUpperTriangular(A)); 1652 PetscCall(MatCopy_Basic(A, B, str)); 1653 PetscCall(MatRestoreRowUpperTriangular(A)); 1654 } else { 1655 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1656 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1657 1658 PetscCall(MatCopy(a->A, b->A, str)); 1659 PetscCall(MatCopy(a->B, b->B, str)); 1660 } 1661 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1666 { 1667 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1668 PetscBLASInt bnz, one = 1; 1669 Mat_SeqSBAIJ *xa, *ya; 1670 Mat_SeqBAIJ *xb, *yb; 1671 1672 PetscFunctionBegin; 1673 if (str == SAME_NONZERO_PATTERN) { 1674 PetscScalar alpha = a; 1675 xa = (Mat_SeqSBAIJ *)xx->A->data; 1676 ya = (Mat_SeqSBAIJ *)yy->A->data; 1677 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1678 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1679 xb = (Mat_SeqBAIJ *)xx->B->data; 1680 yb = (Mat_SeqBAIJ *)yy->B->data; 1681 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1682 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1683 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1684 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1685 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1686 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1687 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1688 } else { 1689 Mat B; 1690 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1691 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1692 PetscCall(MatGetRowUpperTriangular(X)); 1693 PetscCall(MatGetRowUpperTriangular(Y)); 1694 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1695 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1696 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1697 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1698 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1699 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1700 PetscCall(MatSetType(B, MATMPISBAIJ)); 1701 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1702 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1703 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1704 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1705 PetscCall(MatHeaderMerge(Y, &B)); 1706 PetscCall(PetscFree(nnz_d)); 1707 PetscCall(PetscFree(nnz_o)); 1708 PetscCall(MatRestoreRowUpperTriangular(X)); 1709 PetscCall(MatRestoreRowUpperTriangular(Y)); 1710 } 1711 PetscFunctionReturn(PETSC_SUCCESS); 1712 } 1713 1714 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1715 { 1716 PetscInt i; 1717 PetscBool flg; 1718 1719 PetscFunctionBegin; 1720 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1721 for (i = 0; i < n; i++) { 1722 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1723 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1724 } 1725 PetscFunctionReturn(PETSC_SUCCESS); 1726 } 1727 1728 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1729 { 1730 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1731 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1732 1733 PetscFunctionBegin; 1734 if (!Y->preallocated) { 1735 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1736 } else if (!aij->nz) { 1737 PetscInt nonew = aij->nonew; 1738 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1739 aij->nonew = nonew; 1740 } 1741 PetscCall(MatShift_Basic(Y, a)); 1742 PetscFunctionReturn(PETSC_SUCCESS); 1743 } 1744 1745 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1746 { 1747 PetscFunctionBegin; 1748 *a = ((Mat_MPISBAIJ *)A->data)->A; 1749 PetscFunctionReturn(PETSC_SUCCESS); 1750 } 1751 1752 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1753 { 1754 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1755 1756 PetscFunctionBegin; 1757 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1758 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1759 PetscFunctionReturn(PETSC_SUCCESS); 1760 } 1761 1762 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer); 1763 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]); 1764 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 1765 1766 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1767 MatGetRow_MPISBAIJ, 1768 MatRestoreRow_MPISBAIJ, 1769 MatMult_MPISBAIJ, 1770 /* 4*/ MatMultAdd_MPISBAIJ, 1771 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1772 MatMultAdd_MPISBAIJ, 1773 NULL, 1774 NULL, 1775 NULL, 1776 /* 10*/ NULL, 1777 NULL, 1778 NULL, 1779 MatSOR_MPISBAIJ, 1780 MatTranspose_MPISBAIJ, 1781 /* 15*/ MatGetInfo_MPISBAIJ, 1782 MatEqual_MPISBAIJ, 1783 MatGetDiagonal_MPISBAIJ, 1784 MatDiagonalScale_MPISBAIJ, 1785 MatNorm_MPISBAIJ, 1786 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1787 MatAssemblyEnd_MPISBAIJ, 1788 MatSetOption_MPISBAIJ, 1789 MatZeroEntries_MPISBAIJ, 1790 /* 24*/ NULL, 1791 NULL, 1792 NULL, 1793 NULL, 1794 NULL, 1795 /* 29*/ MatSetUp_MPI_Hash, 1796 NULL, 1797 NULL, 1798 MatGetDiagonalBlock_MPISBAIJ, 1799 NULL, 1800 /* 34*/ MatDuplicate_MPISBAIJ, 1801 NULL, 1802 NULL, 1803 NULL, 1804 NULL, 1805 /* 39*/ MatAXPY_MPISBAIJ, 1806 MatCreateSubMatrices_MPISBAIJ, 1807 MatIncreaseOverlap_MPISBAIJ, 1808 MatGetValues_MPISBAIJ, 1809 MatCopy_MPISBAIJ, 1810 /* 44*/ NULL, 1811 MatScale_MPISBAIJ, 1812 MatShift_MPISBAIJ, 1813 NULL, 1814 NULL, 1815 /* 49*/ NULL, 1816 NULL, 1817 NULL, 1818 NULL, 1819 NULL, 1820 /* 54*/ NULL, 1821 NULL, 1822 MatSetUnfactored_MPISBAIJ, 1823 NULL, 1824 MatSetValuesBlocked_MPISBAIJ, 1825 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1826 NULL, 1827 NULL, 1828 NULL, 1829 NULL, 1830 /* 64*/ NULL, 1831 NULL, 1832 NULL, 1833 NULL, 1834 MatGetRowMaxAbs_MPISBAIJ, 1835 /* 69*/ NULL, 1836 MatConvert_MPISBAIJ_Basic, 1837 NULL, 1838 NULL, 1839 NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 MatLoad_MPISBAIJ, 1845 /* 79*/ NULL, 1846 NULL, 1847 NULL, 1848 NULL, 1849 NULL, 1850 /* 84*/ NULL, 1851 NULL, 1852 NULL, 1853 NULL, 1854 NULL, 1855 /* 89*/ NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 MatConjugate_MPISBAIJ, 1860 /* 94*/ NULL, 1861 NULL, 1862 MatRealPart_MPISBAIJ, 1863 MatImaginaryPart_MPISBAIJ, 1864 MatGetRowUpperTriangular_MPISBAIJ, 1865 /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ, 1866 NULL, 1867 NULL, 1868 NULL, 1869 NULL, 1870 /*104*/ NULL, 1871 NULL, 1872 NULL, 1873 NULL, 1874 NULL, 1875 /*109*/ NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 NULL, 1880 /*114*/ NULL, 1881 NULL, 1882 NULL, 1883 NULL, 1884 NULL, 1885 /*119*/ NULL, 1886 NULL, 1887 NULL, 1888 NULL, 1889 NULL, 1890 /*124*/ NULL, 1891 NULL, 1892 MatSetBlockSizes_Default, 1893 NULL, 1894 NULL, 1895 /*129*/ NULL, 1896 MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1897 NULL, 1898 NULL, 1899 NULL, 1900 /*134*/ NULL, 1901 NULL, 1902 MatEliminateZeros_MPISBAIJ, 1903 NULL, 1904 NULL, 1905 /*139*/ NULL, 1906 NULL, 1907 MatCopyHashToXAIJ_MPI_Hash, 1908 NULL, 1909 NULL}; 1910 1911 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1912 { 1913 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1914 PetscInt i, mbs, Mbs; 1915 PetscMPIInt size; 1916 1917 PetscFunctionBegin; 1918 if (B->hash_active) { 1919 B->ops[0] = b->cops; 1920 B->hash_active = PETSC_FALSE; 1921 } 1922 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1923 PetscCall(MatSetBlockSize(B, bs)); 1924 PetscCall(PetscLayoutSetUp(B->rmap)); 1925 PetscCall(PetscLayoutSetUp(B->cmap)); 1926 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1927 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1928 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1929 1930 mbs = B->rmap->n / bs; 1931 Mbs = B->rmap->N / bs; 1932 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1933 1934 B->rmap->bs = bs; 1935 b->bs2 = bs * bs; 1936 b->mbs = mbs; 1937 b->Mbs = Mbs; 1938 b->nbs = B->cmap->n / bs; 1939 b->Nbs = B->cmap->N / bs; 1940 1941 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1942 b->rstartbs = B->rmap->rstart / bs; 1943 b->rendbs = B->rmap->rend / bs; 1944 1945 b->cstartbs = B->cmap->rstart / bs; 1946 b->cendbs = B->cmap->rend / bs; 1947 1948 #if defined(PETSC_USE_CTABLE) 1949 PetscCall(PetscHMapIDestroy(&b->colmap)); 1950 #else 1951 PetscCall(PetscFree(b->colmap)); 1952 #endif 1953 PetscCall(PetscFree(b->garray)); 1954 PetscCall(VecDestroy(&b->lvec)); 1955 PetscCall(VecScatterDestroy(&b->Mvctx)); 1956 PetscCall(VecDestroy(&b->slvec0)); 1957 PetscCall(VecDestroy(&b->slvec0b)); 1958 PetscCall(VecDestroy(&b->slvec1)); 1959 PetscCall(VecDestroy(&b->slvec1a)); 1960 PetscCall(VecDestroy(&b->slvec1b)); 1961 PetscCall(VecScatterDestroy(&b->sMvctx)); 1962 1963 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1964 1965 MatSeqXAIJGetOptions_Private(b->B); 1966 PetscCall(MatDestroy(&b->B)); 1967 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1968 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1969 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1970 MatSeqXAIJRestoreOptions_Private(b->B); 1971 1972 MatSeqXAIJGetOptions_Private(b->A); 1973 PetscCall(MatDestroy(&b->A)); 1974 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1975 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1976 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1977 MatSeqXAIJRestoreOptions_Private(b->A); 1978 1979 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1980 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1981 1982 B->preallocated = PETSC_TRUE; 1983 B->was_assembled = PETSC_FALSE; 1984 B->assembled = PETSC_FALSE; 1985 PetscFunctionReturn(PETSC_SUCCESS); 1986 } 1987 1988 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1989 { 1990 PetscInt m, rstart, cend; 1991 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1992 const PetscInt *JJ = NULL; 1993 PetscScalar *values = NULL; 1994 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 1995 PetscBool nooffprocentries; 1996 1997 PetscFunctionBegin; 1998 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 1999 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2000 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2001 PetscCall(PetscLayoutSetUp(B->rmap)); 2002 PetscCall(PetscLayoutSetUp(B->cmap)); 2003 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2004 m = B->rmap->n / bs; 2005 rstart = B->rmap->rstart / bs; 2006 cend = B->cmap->rend / bs; 2007 2008 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2009 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2010 for (i = 0; i < m; i++) { 2011 nz = ii[i + 1] - ii[i]; 2012 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2013 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2014 JJ = jj + ii[i]; 2015 bd = 0; 2016 for (j = 0; j < nz; j++) { 2017 if (*JJ >= i + rstart) break; 2018 JJ++; 2019 bd++; 2020 } 2021 d = 0; 2022 for (; j < nz; j++) { 2023 if (*JJ++ >= cend) break; 2024 d++; 2025 } 2026 d_nnz[i] = d; 2027 o_nnz[i] = nz - d - bd; 2028 nz = nz - bd; 2029 nz_max = PetscMax(nz_max, nz); 2030 } 2031 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2032 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2033 PetscCall(PetscFree2(d_nnz, o_nnz)); 2034 2035 values = (PetscScalar *)V; 2036 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2037 for (i = 0; i < m; i++) { 2038 PetscInt row = i + rstart; 2039 PetscInt ncols = ii[i + 1] - ii[i]; 2040 const PetscInt *icols = jj + ii[i]; 2041 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2042 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2043 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2044 } else { /* block ordering does not match so we can only insert one block at a time. */ 2045 PetscInt j; 2046 for (j = 0; j < ncols; j++) { 2047 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2048 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2049 } 2050 } 2051 } 2052 2053 if (!V) PetscCall(PetscFree(values)); 2054 nooffprocentries = B->nooffprocentries; 2055 B->nooffprocentries = PETSC_TRUE; 2056 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2057 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2058 B->nooffprocentries = nooffprocentries; 2059 2060 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2061 PetscFunctionReturn(PETSC_SUCCESS); 2062 } 2063 2064 /*MC 2065 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2066 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2067 the matrix is stored. 2068 2069 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2070 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2071 2072 Options Database Key: 2073 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2074 2075 Level: beginner 2076 2077 Note: 2078 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2079 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2080 2081 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2082 M*/ 2083 2084 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2085 { 2086 Mat_MPISBAIJ *b; 2087 PetscBool flg = PETSC_FALSE; 2088 2089 PetscFunctionBegin; 2090 PetscCall(PetscNew(&b)); 2091 B->data = (void *)b; 2092 B->ops[0] = MatOps_Values; 2093 2094 B->ops->destroy = MatDestroy_MPISBAIJ; 2095 B->ops->view = MatView_MPISBAIJ; 2096 B->assembled = PETSC_FALSE; 2097 B->insertmode = NOT_SET_VALUES; 2098 2099 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2100 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2101 2102 /* build local table of row and column ownerships */ 2103 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2104 2105 /* build cache for off array entries formed */ 2106 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2107 2108 b->donotstash = PETSC_FALSE; 2109 b->colmap = NULL; 2110 b->garray = NULL; 2111 b->roworiented = PETSC_TRUE; 2112 2113 /* stuff used in block assembly */ 2114 b->barray = NULL; 2115 2116 /* stuff used for matrix vector multiply */ 2117 b->lvec = NULL; 2118 b->Mvctx = NULL; 2119 b->slvec0 = NULL; 2120 b->slvec0b = NULL; 2121 b->slvec1 = NULL; 2122 b->slvec1a = NULL; 2123 b->slvec1b = NULL; 2124 b->sMvctx = NULL; 2125 2126 /* stuff for MatGetRow() */ 2127 b->rowindices = NULL; 2128 b->rowvalues = NULL; 2129 b->getrowactive = PETSC_FALSE; 2130 2131 /* hash table stuff */ 2132 b->ht = NULL; 2133 b->hd = NULL; 2134 b->ht_size = 0; 2135 b->ht_flag = PETSC_FALSE; 2136 b->ht_fact = 0; 2137 b->ht_total_ct = 0; 2138 b->ht_insert_ct = 0; 2139 2140 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2141 b->ijonly = PETSC_FALSE; 2142 2143 b->in_loc = NULL; 2144 b->v_loc = NULL; 2145 b->n_loc = 0; 2146 2147 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2148 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2149 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2150 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2151 #if defined(PETSC_HAVE_ELEMENTAL) 2152 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2153 #endif 2154 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 2155 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2156 #endif 2157 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2158 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2159 2160 B->symmetric = PETSC_BOOL3_TRUE; 2161 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2162 B->symmetry_eternal = PETSC_TRUE; 2163 B->structural_symmetry_eternal = PETSC_TRUE; 2164 #if defined(PETSC_USE_COMPLEX) 2165 B->hermitian = PETSC_BOOL3_FALSE; 2166 #else 2167 B->hermitian = PETSC_BOOL3_TRUE; 2168 #endif 2169 2170 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2171 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2172 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2173 if (flg) { 2174 PetscReal fact = 1.39; 2175 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2176 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2177 if (fact <= 1.0) fact = 1.39; 2178 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2179 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2180 } 2181 PetscOptionsEnd(); 2182 PetscFunctionReturn(PETSC_SUCCESS); 2183 } 2184 2185 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2186 /*MC 2187 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2188 2189 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2190 and `MATMPISBAIJ` otherwise. 2191 2192 Options Database Key: 2193 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2194 2195 Level: beginner 2196 2197 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2198 M*/ 2199 2200 /*@ 2201 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2202 the user should preallocate the matrix storage by setting the parameters 2203 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2204 performance can be increased by more than a factor of 50. 2205 2206 Collective 2207 2208 Input Parameters: 2209 + B - the matrix 2210 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2211 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2212 . d_nz - number of block nonzeros per block row in diagonal portion of local 2213 submatrix (same for all local rows) 2214 . d_nnz - array containing the number of block nonzeros in the various block rows 2215 in the upper triangular and diagonal part of the in diagonal portion of the local 2216 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2217 for the diagonal entry and set a value even if it is zero. 2218 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2219 submatrix (same for all local rows). 2220 - o_nnz - array containing the number of nonzeros in the various block rows of the 2221 off-diagonal portion of the local submatrix that is right of the diagonal 2222 (possibly different for each block row) or `NULL`. 2223 2224 Options Database Keys: 2225 + -mat_no_unroll - uses code that does not unroll the loops in the 2226 block calculations (much slower) 2227 - -mat_block_size - size of the blocks to use 2228 2229 Level: intermediate 2230 2231 Notes: 2232 2233 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2234 than it must be used on all processors that share the object for that argument. 2235 2236 If the *_nnz parameter is given then the *_nz parameter is ignored 2237 2238 Storage Information: 2239 For a square global matrix we define each processor's diagonal portion 2240 to be its local rows and the corresponding columns (a square submatrix); 2241 each processor's off-diagonal portion encompasses the remainder of the 2242 local matrix (a rectangular submatrix). 2243 2244 The user can specify preallocated storage for the diagonal part of 2245 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2246 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2247 memory allocation. Likewise, specify preallocated storage for the 2248 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2249 2250 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2251 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2252 You can also run with the option `-info` and look for messages with the string 2253 malloc in them to see if additional memory allocation was needed. 2254 2255 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2256 the figure below we depict these three local rows and all columns (0-11). 2257 2258 .vb 2259 0 1 2 3 4 5 6 7 8 9 10 11 2260 -------------------------- 2261 row 3 |. . . d d d o o o o o o 2262 row 4 |. . . d d d o o o o o o 2263 row 5 |. . . d d d o o o o o o 2264 -------------------------- 2265 .ve 2266 2267 Thus, any entries in the d locations are stored in the d (diagonal) 2268 submatrix, and any entries in the o locations are stored in the 2269 o (off-diagonal) submatrix. Note that the d matrix is stored in 2270 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2271 2272 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2273 plus the diagonal part of the d matrix, 2274 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2275 2276 In general, for PDE problems in which most nonzeros are near the diagonal, 2277 one expects `d_nz` >> `o_nz`. 2278 2279 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2280 @*/ 2281 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2282 { 2283 PetscFunctionBegin; 2284 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2285 PetscValidType(B, 1); 2286 PetscValidLogicalCollectiveInt(B, bs, 2); 2287 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2288 PetscFunctionReturn(PETSC_SUCCESS); 2289 } 2290 2291 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2292 /*@ 2293 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2294 (block compressed row). For good matrix assembly performance 2295 the user should preallocate the matrix storage by setting the parameters 2296 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2297 2298 Collective 2299 2300 Input Parameters: 2301 + comm - MPI communicator 2302 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2303 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2304 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2305 This value should be the same as the local size used in creating the 2306 y vector for the matrix-vector product y = Ax. 2307 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2308 This value should be the same as the local size used in creating the 2309 x vector for the matrix-vector product y = Ax. 2310 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2311 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2312 . d_nz - number of block nonzeros per block row in diagonal portion of local 2313 submatrix (same for all local rows) 2314 . d_nnz - array containing the number of block nonzeros in the various block rows 2315 in the upper triangular portion of the in diagonal portion of the local 2316 (possibly different for each block block row) or `NULL`. 2317 If you plan to factor the matrix you must leave room for the diagonal entry and 2318 set its value even if it is zero. 2319 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2320 submatrix (same for all local rows). 2321 - o_nnz - array containing the number of nonzeros in the various block rows of the 2322 off-diagonal portion of the local submatrix (possibly different for 2323 each block row) or `NULL`. 2324 2325 Output Parameter: 2326 . A - the matrix 2327 2328 Options Database Keys: 2329 + -mat_no_unroll - uses code that does not unroll the loops in the 2330 block calculations (much slower) 2331 . -mat_block_size - size of the blocks to use 2332 - -mat_mpi - use the parallel matrix data structures even on one processor 2333 (defaults to using SeqBAIJ format on one processor) 2334 2335 Level: intermediate 2336 2337 Notes: 2338 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2339 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2340 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2341 2342 The number of rows and columns must be divisible by blocksize. 2343 This matrix type does not support complex Hermitian operation. 2344 2345 The user MUST specify either the local or global matrix dimensions 2346 (possibly both). 2347 2348 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2349 than it must be used on all processors that share the object for that argument. 2350 2351 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by 2352 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 2353 2354 If the *_nnz parameter is given then the *_nz parameter is ignored 2355 2356 Storage Information: 2357 For a square global matrix we define each processor's diagonal portion 2358 to be its local rows and the corresponding columns (a square submatrix); 2359 each processor's off-diagonal portion encompasses the remainder of the 2360 local matrix (a rectangular submatrix). 2361 2362 The user can specify preallocated storage for the diagonal part of 2363 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2364 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2365 memory allocation. Likewise, specify preallocated storage for the 2366 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2367 2368 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2369 the figure below we depict these three local rows and all columns (0-11). 2370 2371 .vb 2372 0 1 2 3 4 5 6 7 8 9 10 11 2373 -------------------------- 2374 row 3 |. . . d d d o o o o o o 2375 row 4 |. . . d d d o o o o o o 2376 row 5 |. . . d d d o o o o o o 2377 -------------------------- 2378 .ve 2379 2380 Thus, any entries in the d locations are stored in the d (diagonal) 2381 submatrix, and any entries in the o locations are stored in the 2382 o (off-diagonal) submatrix. Note that the d matrix is stored in 2383 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2384 2385 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2386 plus the diagonal part of the d matrix, 2387 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2388 In general, for PDE problems in which most nonzeros are near the diagonal, 2389 one expects `d_nz` >> `o_nz`. 2390 2391 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, 2392 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 2393 @*/ 2394 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2395 { 2396 PetscMPIInt size; 2397 2398 PetscFunctionBegin; 2399 PetscCall(MatCreate(comm, A)); 2400 PetscCall(MatSetSizes(*A, m, n, M, N)); 2401 PetscCallMPI(MPI_Comm_size(comm, &size)); 2402 if (size > 1) { 2403 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2404 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2405 } else { 2406 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2407 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2408 } 2409 PetscFunctionReturn(PETSC_SUCCESS); 2410 } 2411 2412 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2413 { 2414 Mat mat; 2415 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2416 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2417 PetscScalar *array; 2418 2419 PetscFunctionBegin; 2420 *newmat = NULL; 2421 2422 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2423 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2424 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2425 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2426 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2427 2428 if (matin->hash_active) { 2429 PetscCall(MatSetUp(mat)); 2430 } else { 2431 mat->factortype = matin->factortype; 2432 mat->preallocated = PETSC_TRUE; 2433 mat->assembled = PETSC_TRUE; 2434 mat->insertmode = NOT_SET_VALUES; 2435 2436 a = (Mat_MPISBAIJ *)mat->data; 2437 a->bs2 = oldmat->bs2; 2438 a->mbs = oldmat->mbs; 2439 a->nbs = oldmat->nbs; 2440 a->Mbs = oldmat->Mbs; 2441 a->Nbs = oldmat->Nbs; 2442 2443 a->size = oldmat->size; 2444 a->rank = oldmat->rank; 2445 a->donotstash = oldmat->donotstash; 2446 a->roworiented = oldmat->roworiented; 2447 a->rowindices = NULL; 2448 a->rowvalues = NULL; 2449 a->getrowactive = PETSC_FALSE; 2450 a->barray = NULL; 2451 a->rstartbs = oldmat->rstartbs; 2452 a->rendbs = oldmat->rendbs; 2453 a->cstartbs = oldmat->cstartbs; 2454 a->cendbs = oldmat->cendbs; 2455 2456 /* hash table stuff */ 2457 a->ht = NULL; 2458 a->hd = NULL; 2459 a->ht_size = 0; 2460 a->ht_flag = oldmat->ht_flag; 2461 a->ht_fact = oldmat->ht_fact; 2462 a->ht_total_ct = 0; 2463 a->ht_insert_ct = 0; 2464 2465 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2466 if (oldmat->colmap) { 2467 #if defined(PETSC_USE_CTABLE) 2468 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2469 #else 2470 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2471 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2472 #endif 2473 } else a->colmap = NULL; 2474 2475 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 2476 PetscCall(PetscMalloc1(len, &a->garray)); 2477 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2478 } else a->garray = NULL; 2479 2480 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2481 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2482 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2483 2484 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2485 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2486 2487 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2488 PetscCall(VecGetArray(a->slvec1, &array)); 2489 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2490 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2491 PetscCall(VecRestoreArray(a->slvec1, &array)); 2492 PetscCall(VecGetArray(a->slvec0, &array)); 2493 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2494 PetscCall(VecRestoreArray(a->slvec0, &array)); 2495 2496 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2497 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2498 a->sMvctx = oldmat->sMvctx; 2499 2500 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2501 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2502 } 2503 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2504 *newmat = mat; 2505 PetscFunctionReturn(PETSC_SUCCESS); 2506 } 2507 2508 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2509 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2510 2511 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2512 { 2513 PetscBool isbinary; 2514 2515 PetscFunctionBegin; 2516 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2517 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2518 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2519 PetscFunctionReturn(PETSC_SUCCESS); 2520 } 2521 2522 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2523 { 2524 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2525 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data; 2526 PetscReal atmp; 2527 PetscReal *work, *svalues, *rvalues; 2528 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2529 PetscMPIInt rank, size; 2530 PetscInt *rowners_bs, count, source; 2531 PetscScalar *va; 2532 MatScalar *ba; 2533 MPI_Status stat; 2534 2535 PetscFunctionBegin; 2536 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2537 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2538 PetscCall(VecGetArray(v, &va)); 2539 2540 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2541 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2542 2543 bs = A->rmap->bs; 2544 mbs = a->mbs; 2545 Mbs = a->Mbs; 2546 ba = b->a; 2547 bi = b->i; 2548 bj = b->j; 2549 2550 /* find ownerships */ 2551 rowners_bs = A->rmap->range; 2552 2553 /* each proc creates an array to be distributed */ 2554 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2555 2556 /* row_max for B */ 2557 if (rank != size - 1) { 2558 for (i = 0; i < mbs; i++) { 2559 ncols = bi[1] - bi[0]; 2560 bi++; 2561 brow = bs * i; 2562 for (j = 0; j < ncols; j++) { 2563 bcol = bs * (*bj); 2564 for (kcol = 0; kcol < bs; kcol++) { 2565 col = bcol + kcol; /* local col index */ 2566 col += rowners_bs[rank + 1]; /* global col index */ 2567 for (krow = 0; krow < bs; krow++) { 2568 atmp = PetscAbsScalar(*ba); 2569 ba++; 2570 row = brow + krow; /* local row index */ 2571 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2572 if (work[col] < atmp) work[col] = atmp; 2573 } 2574 } 2575 bj++; 2576 } 2577 } 2578 2579 /* send values to its owners */ 2580 for (PetscMPIInt dest = rank + 1; dest < size; dest++) { 2581 svalues = work + rowners_bs[dest]; 2582 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2583 PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2584 } 2585 } 2586 2587 /* receive values */ 2588 if (rank) { 2589 rvalues = work; 2590 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2591 for (source = 0; source < rank; source++) { 2592 PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2593 /* process values */ 2594 for (i = 0; i < count; i++) { 2595 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2596 } 2597 } 2598 } 2599 2600 PetscCall(VecRestoreArray(v, &va)); 2601 PetscCall(PetscFree(work)); 2602 PetscFunctionReturn(PETSC_SUCCESS); 2603 } 2604 2605 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2606 { 2607 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2608 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2609 PetscScalar *x, *ptr, *from; 2610 Vec bb1; 2611 const PetscScalar *b; 2612 2613 PetscFunctionBegin; 2614 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2615 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2616 2617 if (flag == SOR_APPLY_UPPER) { 2618 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2619 PetscFunctionReturn(PETSC_SUCCESS); 2620 } 2621 2622 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2623 if (flag & SOR_ZERO_INITIAL_GUESS) { 2624 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2625 its--; 2626 } 2627 2628 PetscCall(VecDuplicate(bb, &bb1)); 2629 while (its--) { 2630 /* lower triangular part: slvec0b = - B^T*xx */ 2631 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2632 2633 /* copy xx into slvec0a */ 2634 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2635 PetscCall(VecGetArray(xx, &x)); 2636 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2637 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2638 2639 PetscCall(VecScale(mat->slvec0, -1.0)); 2640 2641 /* copy bb into slvec1a */ 2642 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2643 PetscCall(VecGetArrayRead(bb, &b)); 2644 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2645 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2646 2647 /* set slvec1b = 0 */ 2648 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2649 PetscCall(VecZeroEntries(mat->slvec1b)); 2650 2651 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2652 PetscCall(VecRestoreArray(xx, &x)); 2653 PetscCall(VecRestoreArrayRead(bb, &b)); 2654 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2655 2656 /* upper triangular part: bb1 = bb1 - B*x */ 2657 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2658 2659 /* local diagonal sweep */ 2660 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2661 } 2662 PetscCall(VecDestroy(&bb1)); 2663 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2664 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2665 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2666 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2667 } else if (flag & SOR_EISENSTAT) { 2668 Vec xx1; 2669 PetscBool hasop; 2670 const PetscScalar *diag; 2671 PetscScalar *sl, scale = (omega - 2.0) / omega; 2672 PetscInt i, n; 2673 2674 if (!mat->xx1) { 2675 PetscCall(VecDuplicate(bb, &mat->xx1)); 2676 PetscCall(VecDuplicate(bb, &mat->bb1)); 2677 } 2678 xx1 = mat->xx1; 2679 bb1 = mat->bb1; 2680 2681 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2682 2683 if (!mat->diag) { 2684 /* this is wrong for same matrix with new nonzero values */ 2685 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2686 PetscCall(MatGetDiagonal(matin, mat->diag)); 2687 } 2688 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2689 2690 if (hasop) { 2691 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2692 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2693 } else { 2694 /* 2695 These two lines are replaced by code that may be a bit faster for a good compiler 2696 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2697 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2698 */ 2699 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2700 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2701 PetscCall(VecGetArrayRead(bb, &b)); 2702 PetscCall(VecGetArray(xx, &x)); 2703 PetscCall(VecGetLocalSize(xx, &n)); 2704 if (omega == 1.0) { 2705 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2706 PetscCall(PetscLogFlops(2.0 * n)); 2707 } else { 2708 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2709 PetscCall(PetscLogFlops(3.0 * n)); 2710 } 2711 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2712 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2713 PetscCall(VecRestoreArrayRead(bb, &b)); 2714 PetscCall(VecRestoreArray(xx, &x)); 2715 } 2716 2717 /* multiply off-diagonal portion of matrix */ 2718 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2719 PetscCall(VecZeroEntries(mat->slvec1b)); 2720 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2721 PetscCall(VecGetArray(mat->slvec0, &from)); 2722 PetscCall(VecGetArray(xx, &x)); 2723 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2724 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2725 PetscCall(VecRestoreArray(xx, &x)); 2726 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2727 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2728 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2729 2730 /* local sweep */ 2731 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2732 PetscCall(VecAXPY(xx, 1.0, xx1)); 2733 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2734 PetscFunctionReturn(PETSC_SUCCESS); 2735 } 2736 2737 /*@ 2738 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows. 2739 2740 Collective 2741 2742 Input Parameters: 2743 + comm - MPI communicator 2744 . bs - the block size, only a block size of 1 is supported 2745 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2746 . n - This value should be the same as the local size used in creating the 2747 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 2748 calculated if `N` is given) For square matrices `n` is almost always `m`. 2749 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2750 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2751 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2752 . j - column indices 2753 - a - matrix values 2754 2755 Output Parameter: 2756 . mat - the matrix 2757 2758 Level: intermediate 2759 2760 Notes: 2761 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2762 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2763 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2764 2765 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2766 2767 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2768 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()` 2769 @*/ 2770 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2771 { 2772 PetscFunctionBegin; 2773 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2774 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2775 PetscCall(MatCreate(comm, mat)); 2776 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2777 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2778 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2779 PetscFunctionReturn(PETSC_SUCCESS); 2780 } 2781 2782 /*@ 2783 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2784 2785 Collective 2786 2787 Input Parameters: 2788 + B - the matrix 2789 . bs - the block size 2790 . i - the indices into `j` for the start of each local row (indices start with zero) 2791 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 2792 - v - optional values in the matrix, pass `NULL` if not provided 2793 2794 Level: advanced 2795 2796 Notes: 2797 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2798 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2799 called this routine. 2800 2801 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2802 and usually the numerical values as well 2803 2804 Any entries passed in that are below the diagonal are ignored 2805 2806 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 2807 `MatCreateMPISBAIJWithArrays()` 2808 @*/ 2809 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2810 { 2811 PetscFunctionBegin; 2812 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2813 PetscFunctionReturn(PETSC_SUCCESS); 2814 } 2815 2816 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2817 { 2818 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2819 PetscInt *indx; 2820 PetscScalar *values; 2821 2822 PetscFunctionBegin; 2823 PetscCall(MatGetSize(inmat, &m, &N)); 2824 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2825 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2826 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2827 PetscInt *bindx, rmax = a->rmax, j; 2828 PetscMPIInt rank, size; 2829 2830 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2831 mbs = m / bs; 2832 Nbs = N / cbs; 2833 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2834 nbs = n / cbs; 2835 2836 PetscCall(PetscMalloc1(rmax, &bindx)); 2837 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2838 2839 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2840 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2841 if (rank == size - 1) { 2842 /* Check sum(nbs) = Nbs */ 2843 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2844 } 2845 2846 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2847 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2848 for (i = 0; i < mbs; i++) { 2849 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2850 nnz = nnz / bs; 2851 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2852 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2853 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2854 } 2855 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2856 PetscCall(PetscFree(bindx)); 2857 2858 PetscCall(MatCreate(comm, outmat)); 2859 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2860 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2861 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2862 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2863 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2864 MatPreallocateEnd(dnz, onz); 2865 } 2866 2867 /* numeric phase */ 2868 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2869 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2870 2871 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2872 for (i = 0; i < m; i++) { 2873 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2874 Ii = i + rstart; 2875 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2876 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2877 } 2878 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2879 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2880 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2881 PetscFunctionReturn(PETSC_SUCCESS); 2882 } 2883