1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 A->nonzerostate++; \ 238 a_noinsert:; \ 239 ailen[brow] = nrow; \ 240 } while (0) 241 242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 243 do { \ 244 brow = row / bs; \ 245 rp = bj + bi[brow]; \ 246 ap = ba + bs2 * bi[brow]; \ 247 rmax = bimax[brow]; \ 248 nrow = bilen[brow]; \ 249 bcol = col / bs; \ 250 ridx = row % bs; \ 251 cidx = col % bs; \ 252 low = 0; \ 253 high = nrow; \ 254 while (high - low > 3) { \ 255 t = (low + high) / 2; \ 256 if (rp[t] > bcol) high = t; \ 257 else low = t; \ 258 } \ 259 for (_i = low; _i < high; _i++) { \ 260 if (rp[_i] > bcol) break; \ 261 if (rp[_i] == bcol) { \ 262 bap = ap + bs2 * _i + bs * cidx + ridx; \ 263 if (addv == ADD_VALUES) *bap += value; \ 264 else *bap = value; \ 265 goto b_noinsert; \ 266 } \ 267 } \ 268 if (b->nonew == 1) goto b_noinsert; \ 269 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 270 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 271 N = nrow++ - 1; \ 272 /* shift up all the later entries in this row */ \ 273 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 274 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 275 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 276 rp[_i] = bcol; \ 277 ap[bs2 * _i + bs * cidx + ridx] = value; \ 278 B->nonzerostate++; \ 279 b_noinsert:; \ 280 bilen[brow] = nrow; \ 281 } while (0) 282 283 /* Only add/insert a(i,j) with i<=j (blocks). 284 Any a(i,j) with i>j input by user is ignored or generates an error 285 */ 286 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 287 { 288 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 289 MatScalar value; 290 PetscBool roworiented = baij->roworiented; 291 PetscInt i, j, row, col; 292 PetscInt rstart_orig = mat->rmap->rstart; 293 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 294 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 295 296 /* Some Variables required in the macro */ 297 Mat A = baij->A; 298 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 299 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 300 MatScalar *aa = a->a; 301 302 Mat B = baij->B; 303 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 304 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 305 MatScalar *ba = b->a; 306 307 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 308 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 309 MatScalar *ap, *bap; 310 311 /* for stash */ 312 PetscInt n_loc, *in_loc = NULL; 313 MatScalar *v_loc = NULL; 314 315 PetscFunctionBegin; 316 if (!baij->donotstash) { 317 if (n > baij->n_loc) { 318 PetscCall(PetscFree(baij->in_loc)); 319 PetscCall(PetscFree(baij->v_loc)); 320 PetscCall(PetscMalloc1(n, &baij->in_loc)); 321 PetscCall(PetscMalloc1(n, &baij->v_loc)); 322 323 baij->n_loc = n; 324 } 325 in_loc = baij->in_loc; 326 v_loc = baij->v_loc; 327 } 328 329 for (i = 0; i < m; i++) { 330 if (im[i] < 0) continue; 331 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 332 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 333 row = im[i] - rstart_orig; /* local row index */ 334 for (j = 0; j < n; j++) { 335 if (im[i] / bs > in[j] / bs) { 336 if (a->ignore_ltriangular) { 337 continue; /* ignore lower triangular blocks */ 338 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 339 } 340 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 341 col = in[j] - cstart_orig; /* local col index */ 342 brow = row / bs; 343 bcol = col / bs; 344 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 345 if (roworiented) value = v[i * n + j]; 346 else value = v[i + j * m]; 347 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 348 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 349 } else if (in[j] < 0) { 350 continue; 351 } else { 352 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 353 /* off-diag entry (B) */ 354 if (mat->was_assembled) { 355 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 356 #if defined(PETSC_USE_CTABLE) 357 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 358 col = col - 1; 359 #else 360 col = baij->colmap[in[j] / bs] - 1; 361 #endif 362 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) { 363 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 364 col = in[j]; 365 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 366 B = baij->B; 367 b = (Mat_SeqBAIJ *)(B)->data; 368 bimax = b->imax; 369 bi = b->i; 370 bilen = b->ilen; 371 bj = b->j; 372 ba = b->a; 373 } else col += in[j] % bs; 374 } else col = in[j]; 375 if (roworiented) value = v[i * n + j]; 376 else value = v[i + j * m]; 377 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 378 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 379 } 380 } 381 } else { /* off processor entry */ 382 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 383 if (!baij->donotstash) { 384 mat->assembled = PETSC_FALSE; 385 n_loc = 0; 386 for (j = 0; j < n; j++) { 387 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 388 in_loc[n_loc] = in[j]; 389 if (roworiented) { 390 v_loc[n_loc] = v[i * n + j]; 391 } else { 392 v_loc[n_loc] = v[j * m + i]; 393 } 394 n_loc++; 395 } 396 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 397 } 398 } 399 } 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 404 { 405 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 406 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 407 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 408 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 409 PetscBool roworiented = a->roworiented; 410 const PetscScalar *value = v; 411 MatScalar *ap, *aa = a->a, *bap; 412 413 PetscFunctionBegin; 414 if (col < row) { 415 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 416 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 417 } 418 rp = aj + ai[row]; 419 ap = aa + bs2 * ai[row]; 420 rmax = imax[row]; 421 nrow = ailen[row]; 422 value = v; 423 low = 0; 424 high = nrow; 425 426 while (high - low > 7) { 427 t = (low + high) / 2; 428 if (rp[t] > col) high = t; 429 else low = t; 430 } 431 for (i = low; i < high; i++) { 432 if (rp[i] > col) break; 433 if (rp[i] == col) { 434 bap = ap + bs2 * i; 435 if (roworiented) { 436 if (is == ADD_VALUES) { 437 for (ii = 0; ii < bs; ii++) { 438 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 439 } 440 } else { 441 for (ii = 0; ii < bs; ii++) { 442 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 443 } 444 } 445 } else { 446 if (is == ADD_VALUES) { 447 for (ii = 0; ii < bs; ii++) { 448 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 449 } 450 } else { 451 for (ii = 0; ii < bs; ii++) { 452 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 453 } 454 } 455 } 456 goto noinsert2; 457 } 458 } 459 if (nonew == 1) goto noinsert2; 460 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 461 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 462 N = nrow++ - 1; 463 high++; 464 /* shift up all the later entries in this row */ 465 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 466 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 467 rp[i] = col; 468 bap = ap + bs2 * i; 469 if (roworiented) { 470 for (ii = 0; ii < bs; ii++) { 471 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 472 } 473 } else { 474 for (ii = 0; ii < bs; ii++) { 475 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 476 } 477 } 478 noinsert2:; 479 ailen[row] = nrow; 480 PetscFunctionReturn(PETSC_SUCCESS); 481 } 482 483 /* 484 This routine is exactly duplicated in mpibaij.c 485 */ 486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 487 { 488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 489 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 490 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 491 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 492 PetscBool roworiented = a->roworiented; 493 const PetscScalar *value = v; 494 MatScalar *ap, *aa = a->a, *bap; 495 496 PetscFunctionBegin; 497 rp = aj + ai[row]; 498 ap = aa + bs2 * ai[row]; 499 rmax = imax[row]; 500 nrow = ailen[row]; 501 low = 0; 502 high = nrow; 503 value = v; 504 while (high - low > 7) { 505 t = (low + high) / 2; 506 if (rp[t] > col) high = t; 507 else low = t; 508 } 509 for (i = low; i < high; i++) { 510 if (rp[i] > col) break; 511 if (rp[i] == col) { 512 bap = ap + bs2 * i; 513 if (roworiented) { 514 if (is == ADD_VALUES) { 515 for (ii = 0; ii < bs; ii++) { 516 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 517 } 518 } else { 519 for (ii = 0; ii < bs; ii++) { 520 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 521 } 522 } 523 } else { 524 if (is == ADD_VALUES) { 525 for (ii = 0; ii < bs; ii++, value += bs) { 526 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 527 bap += bs; 528 } 529 } else { 530 for (ii = 0; ii < bs; ii++, value += bs) { 531 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 532 bap += bs; 533 } 534 } 535 } 536 goto noinsert2; 537 } 538 } 539 if (nonew == 1) goto noinsert2; 540 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 541 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 542 N = nrow++ - 1; 543 high++; 544 /* shift up all the later entries in this row */ 545 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 546 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 547 rp[i] = col; 548 bap = ap + bs2 * i; 549 if (roworiented) { 550 for (ii = 0; ii < bs; ii++) { 551 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 552 } 553 } else { 554 for (ii = 0; ii < bs; ii++) { 555 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 556 } 557 } 558 noinsert2:; 559 ailen[row] = nrow; 560 PetscFunctionReturn(PETSC_SUCCESS); 561 } 562 563 /* 564 This routine could be optimized by removing the need for the block copy below and passing stride information 565 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 566 */ 567 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 568 { 569 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 570 const MatScalar *value; 571 MatScalar *barray = baij->barray; 572 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 573 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 574 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 575 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 576 577 PetscFunctionBegin; 578 if (!barray) { 579 PetscCall(PetscMalloc1(bs2, &barray)); 580 baij->barray = barray; 581 } 582 583 if (roworiented) { 584 stepval = (n - 1) * bs; 585 } else { 586 stepval = (m - 1) * bs; 587 } 588 for (i = 0; i < m; i++) { 589 if (im[i] < 0) continue; 590 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 591 if (im[i] >= rstart && im[i] < rend) { 592 row = im[i] - rstart; 593 for (j = 0; j < n; j++) { 594 if (im[i] > in[j]) { 595 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 596 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 597 } 598 /* If NumCol = 1 then a copy is not required */ 599 if ((roworiented) && (n == 1)) { 600 barray = (MatScalar *)v + i * bs2; 601 } else if ((!roworiented) && (m == 1)) { 602 barray = (MatScalar *)v + j * bs2; 603 } else { /* Here a copy is required */ 604 if (roworiented) { 605 value = v + i * (stepval + bs) * bs + j * bs; 606 } else { 607 value = v + j * (stepval + bs) * bs + i * bs; 608 } 609 for (ii = 0; ii < bs; ii++, value += stepval) { 610 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 611 } 612 barray -= bs2; 613 } 614 615 if (in[j] >= cstart && in[j] < cend) { 616 col = in[j] - cstart; 617 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 618 } else if (in[j] < 0) { 619 continue; 620 } else { 621 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 622 if (mat->was_assembled) { 623 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 624 625 #if defined(PETSC_USE_CTABLE) 626 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 627 col = col < 1 ? -1 : (col - 1) / bs; 628 #else 629 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 630 #endif 631 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 632 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 633 col = in[j]; 634 } 635 } else col = in[j]; 636 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 637 } 638 } 639 } else { 640 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 641 if (!baij->donotstash) { 642 if (roworiented) { 643 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 644 } else { 645 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 646 } 647 } 648 } 649 } 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 654 { 655 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 656 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 657 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 658 659 PetscFunctionBegin; 660 for (i = 0; i < m; i++) { 661 if (idxm[i] < 0) continue; /* negative row */ 662 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 663 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 664 row = idxm[i] - bsrstart; 665 for (j = 0; j < n; j++) { 666 if (idxn[j] < 0) continue; /* negative column */ 667 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 668 if (idxn[j] >= bscstart && idxn[j] < bscend) { 669 col = idxn[j] - bscstart; 670 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 671 } else { 672 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 673 #if defined(PETSC_USE_CTABLE) 674 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 675 data--; 676 #else 677 data = baij->colmap[idxn[j] / bs] - 1; 678 #endif 679 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 680 else { 681 col = data + idxn[j] % bs; 682 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 683 } 684 } 685 } 686 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 687 } 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } 690 691 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 692 { 693 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 694 PetscReal sum[2], *lnorm2; 695 696 PetscFunctionBegin; 697 if (baij->size == 1) { 698 PetscCall(MatNorm(baij->A, type, norm)); 699 } else { 700 if (type == NORM_FROBENIUS) { 701 PetscCall(PetscMalloc1(2, &lnorm2)); 702 PetscCall(MatNorm(baij->A, type, lnorm2)); 703 *lnorm2 = (*lnorm2) * (*lnorm2); 704 lnorm2++; /* squar power of norm(A) */ 705 PetscCall(MatNorm(baij->B, type, lnorm2)); 706 *lnorm2 = (*lnorm2) * (*lnorm2); 707 lnorm2--; /* squar power of norm(B) */ 708 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 709 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 710 PetscCall(PetscFree(lnorm2)); 711 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 712 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 713 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 714 PetscReal *rsum, *rsum2, vabs; 715 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 716 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 717 MatScalar *v; 718 719 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 720 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 721 /* Amat */ 722 v = amat->a; 723 jj = amat->j; 724 for (brow = 0; brow < mbs; brow++) { 725 grow = bs * (rstart + brow); 726 nz = amat->i[brow + 1] - amat->i[brow]; 727 for (bcol = 0; bcol < nz; bcol++) { 728 gcol = bs * (rstart + *jj); 729 jj++; 730 for (col = 0; col < bs; col++) { 731 for (row = 0; row < bs; row++) { 732 vabs = PetscAbsScalar(*v); 733 v++; 734 rsum[gcol + col] += vabs; 735 /* non-diagonal block */ 736 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 737 } 738 } 739 } 740 PetscCall(PetscLogFlops(nz * bs * bs)); 741 } 742 /* Bmat */ 743 v = bmat->a; 744 jj = bmat->j; 745 for (brow = 0; brow < mbs; brow++) { 746 grow = bs * (rstart + brow); 747 nz = bmat->i[brow + 1] - bmat->i[brow]; 748 for (bcol = 0; bcol < nz; bcol++) { 749 gcol = bs * garray[*jj]; 750 jj++; 751 for (col = 0; col < bs; col++) { 752 for (row = 0; row < bs; row++) { 753 vabs = PetscAbsScalar(*v); 754 v++; 755 rsum[gcol + col] += vabs; 756 rsum[grow + row] += vabs; 757 } 758 } 759 } 760 PetscCall(PetscLogFlops(nz * bs * bs)); 761 } 762 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 763 *norm = 0.0; 764 for (col = 0; col < mat->cmap->N; col++) { 765 if (rsum2[col] > *norm) *norm = rsum2[col]; 766 } 767 PetscCall(PetscFree2(rsum, rsum2)); 768 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 769 } 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 774 { 775 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 776 PetscInt nstash, reallocs; 777 778 PetscFunctionBegin; 779 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 780 781 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 782 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 783 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 784 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 785 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 786 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 791 { 792 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 793 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 794 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 795 PetscInt *row, *col; 796 PetscBool other_disassembled; 797 PetscMPIInt n; 798 PetscBool r1, r2, r3; 799 MatScalar *val; 800 801 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 802 PetscFunctionBegin; 803 if (!baij->donotstash && !mat->nooffprocentries) { 804 while (1) { 805 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 806 if (!flg) break; 807 808 for (i = 0; i < n;) { 809 /* Now identify the consecutive vals belonging to the same row */ 810 for (j = i, rstart = row[j]; j < n; j++) { 811 if (row[j] != rstart) break; 812 } 813 if (j < n) ncols = j - i; 814 else ncols = n - i; 815 /* Now assemble all these values with a single function call */ 816 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 817 i = j; 818 } 819 } 820 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 821 /* Now process the block-stash. Since the values are stashed column-oriented, 822 set the row-oriented flag to column-oriented, and after MatSetValues() 823 restore the original flags */ 824 r1 = baij->roworiented; 825 r2 = a->roworiented; 826 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 827 828 baij->roworiented = PETSC_FALSE; 829 a->roworiented = PETSC_FALSE; 830 831 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 832 while (1) { 833 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 834 if (!flg) break; 835 836 for (i = 0; i < n;) { 837 /* Now identify the consecutive vals belonging to the same row */ 838 for (j = i, rstart = row[j]; j < n; j++) { 839 if (row[j] != rstart) break; 840 } 841 if (j < n) ncols = j - i; 842 else ncols = n - i; 843 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 844 i = j; 845 } 846 } 847 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 848 849 baij->roworiented = r1; 850 a->roworiented = r2; 851 852 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 853 } 854 855 PetscCall(MatAssemblyBegin(baij->A, mode)); 856 PetscCall(MatAssemblyEnd(baij->A, mode)); 857 858 /* determine if any processor has disassembled, if so we must 859 also disassemble ourselves, in order that we may reassemble. */ 860 /* 861 if nonzero structure of submatrix B cannot change then we know that 862 no processor disassembled thus we can skip this stuff 863 */ 864 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 865 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 866 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 867 } 868 869 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 870 PetscCall(MatAssemblyBegin(baij->B, mode)); 871 PetscCall(MatAssemblyEnd(baij->B, mode)); 872 873 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 874 875 baij->rowvalues = NULL; 876 877 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 878 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 879 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 880 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 881 } 882 PetscFunctionReturn(PETSC_SUCCESS); 883 } 884 885 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 886 #include <petscdraw.h> 887 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 888 { 889 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 890 PetscInt bs = mat->rmap->bs; 891 PetscMPIInt rank = baij->rank; 892 PetscBool iascii, isdraw; 893 PetscViewer sviewer; 894 PetscViewerFormat format; 895 896 PetscFunctionBegin; 897 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 898 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 899 if (iascii) { 900 PetscCall(PetscViewerGetFormat(viewer, &format)); 901 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 902 MatInfo info; 903 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 904 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 905 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 906 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 907 mat->rmap->bs, (double)info.memory)); 908 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 909 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 910 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 911 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 912 PetscCall(PetscViewerFlush(viewer)); 913 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 914 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 915 PetscCall(VecScatterView(baij->Mvctx, viewer)); 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } else if (format == PETSC_VIEWER_ASCII_INFO) { 918 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 919 PetscFunctionReturn(PETSC_SUCCESS); 920 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 } 924 925 if (isdraw) { 926 PetscDraw draw; 927 PetscBool isnull; 928 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 929 PetscCall(PetscDrawIsNull(draw, &isnull)); 930 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 931 } 932 933 { 934 /* assemble the entire matrix onto first processor. */ 935 Mat A; 936 Mat_SeqSBAIJ *Aloc; 937 Mat_SeqBAIJ *Bloc; 938 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 939 MatScalar *a; 940 const char *matname; 941 942 /* Should this be the same type as mat? */ 943 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 944 if (rank == 0) { 945 PetscCall(MatSetSizes(A, M, N, M, N)); 946 } else { 947 PetscCall(MatSetSizes(A, 0, 0, M, N)); 948 } 949 PetscCall(MatSetType(A, MATMPISBAIJ)); 950 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 951 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 952 953 /* copy over the A part */ 954 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 955 ai = Aloc->i; 956 aj = Aloc->j; 957 a = Aloc->a; 958 PetscCall(PetscMalloc1(bs, &rvals)); 959 960 for (i = 0; i < mbs; i++) { 961 rvals[0] = bs * (baij->rstartbs + i); 962 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 963 for (j = ai[i]; j < ai[i + 1]; j++) { 964 col = (baij->cstartbs + aj[j]) * bs; 965 for (k = 0; k < bs; k++) { 966 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 967 col++; 968 a += bs; 969 } 970 } 971 } 972 /* copy over the B part */ 973 Bloc = (Mat_SeqBAIJ *)baij->B->data; 974 ai = Bloc->i; 975 aj = Bloc->j; 976 a = Bloc->a; 977 for (i = 0; i < mbs; i++) { 978 rvals[0] = bs * (baij->rstartbs + i); 979 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 980 for (j = ai[i]; j < ai[i + 1]; j++) { 981 col = baij->garray[aj[j]] * bs; 982 for (k = 0; k < bs; k++) { 983 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 984 col++; 985 a += bs; 986 } 987 } 988 } 989 PetscCall(PetscFree(rvals)); 990 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 991 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 992 /* 993 Everyone has to call to draw the matrix since the graphics waits are 994 synchronized across all processors that share the PetscDraw object 995 */ 996 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 997 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 998 if (rank == 0) { 999 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname)); 1000 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer)); 1001 } 1002 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1003 PetscCall(MatDestroy(&A)); 1004 } 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1009 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1010 1011 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1012 { 1013 PetscBool iascii, isdraw, issocket, isbinary; 1014 1015 PetscFunctionBegin; 1016 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1020 if (iascii || isdraw || issocket) { 1021 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1022 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1023 PetscFunctionReturn(PETSC_SUCCESS); 1024 } 1025 1026 #if defined(PETSC_USE_COMPLEX) 1027 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1028 { 1029 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1030 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1031 PetscScalar *from; 1032 const PetscScalar *x; 1033 1034 PetscFunctionBegin; 1035 /* diagonal part */ 1036 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1037 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1038 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1039 PetscCall(VecZeroEntries(a->slvec1b)); 1040 1041 /* subdiagonal part */ 1042 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1043 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1044 1045 /* copy x into the vec slvec0 */ 1046 PetscCall(VecGetArray(a->slvec0, &from)); 1047 PetscCall(VecGetArrayRead(xx, &x)); 1048 1049 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1050 PetscCall(VecRestoreArray(a->slvec0, &from)); 1051 PetscCall(VecRestoreArrayRead(xx, &x)); 1052 1053 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1054 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1055 /* supperdiagonal part */ 1056 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1057 PetscFunctionReturn(PETSC_SUCCESS); 1058 } 1059 #endif 1060 1061 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1062 { 1063 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1064 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1065 PetscScalar *from; 1066 const PetscScalar *x; 1067 1068 PetscFunctionBegin; 1069 /* diagonal part */ 1070 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1071 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1072 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1073 PetscCall(VecZeroEntries(a->slvec1b)); 1074 1075 /* subdiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1077 1078 /* copy x into the vec slvec0 */ 1079 PetscCall(VecGetArray(a->slvec0, &from)); 1080 PetscCall(VecGetArrayRead(xx, &x)); 1081 1082 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1083 PetscCall(VecRestoreArray(a->slvec0, &from)); 1084 PetscCall(VecRestoreArrayRead(xx, &x)); 1085 1086 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1087 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1088 /* supperdiagonal part */ 1089 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1090 PetscFunctionReturn(PETSC_SUCCESS); 1091 } 1092 1093 #if PetscDefined(USE_COMPLEX) 1094 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1095 { 1096 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1097 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1098 PetscScalar *from; 1099 const PetscScalar *x; 1100 1101 PetscFunctionBegin; 1102 /* diagonal part */ 1103 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1104 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1105 PetscCall(VecZeroEntries(a->slvec1b)); 1106 1107 /* subdiagonal part */ 1108 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1109 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1110 1111 /* copy x into the vec slvec0 */ 1112 PetscCall(VecGetArray(a->slvec0, &from)); 1113 PetscCall(VecGetArrayRead(xx, &x)); 1114 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1115 PetscCall(VecRestoreArray(a->slvec0, &from)); 1116 1117 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1118 PetscCall(VecRestoreArrayRead(xx, &x)); 1119 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1120 1121 /* supperdiagonal part */ 1122 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1123 PetscFunctionReturn(PETSC_SUCCESS); 1124 } 1125 #endif 1126 1127 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1128 { 1129 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1130 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1131 PetscScalar *from; 1132 const PetscScalar *x; 1133 1134 PetscFunctionBegin; 1135 /* diagonal part */ 1136 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1137 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1138 PetscCall(VecZeroEntries(a->slvec1b)); 1139 1140 /* subdiagonal part */ 1141 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1142 1143 /* copy x into the vec slvec0 */ 1144 PetscCall(VecGetArray(a->slvec0, &from)); 1145 PetscCall(VecGetArrayRead(xx, &x)); 1146 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1147 PetscCall(VecRestoreArray(a->slvec0, &from)); 1148 1149 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1150 PetscCall(VecRestoreArrayRead(xx, &x)); 1151 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1152 1153 /* supperdiagonal part */ 1154 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1155 PetscFunctionReturn(PETSC_SUCCESS); 1156 } 1157 1158 /* 1159 This only works correctly for square matrices where the subblock A->A is the 1160 diagonal block 1161 */ 1162 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1163 { 1164 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1165 1166 PetscFunctionBegin; 1167 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1168 PetscCall(MatGetDiagonal(a->A, v)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1173 { 1174 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatScale(a->A, aa)); 1178 PetscCall(MatScale(a->B, aa)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1183 { 1184 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1185 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1186 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1187 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1188 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1189 1190 PetscFunctionBegin; 1191 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1192 mat->getrowactive = PETSC_TRUE; 1193 1194 if (!mat->rowvalues && (idx || v)) { 1195 /* 1196 allocate enough space to hold information from the longest row. 1197 */ 1198 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1199 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1200 PetscInt max = 1, mbs = mat->mbs, tmp; 1201 for (i = 0; i < mbs; i++) { 1202 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1203 if (max < tmp) max = tmp; 1204 } 1205 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1206 } 1207 1208 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1209 lrow = row - brstart; /* local row index */ 1210 1211 pvA = &vworkA; 1212 pcA = &cworkA; 1213 pvB = &vworkB; 1214 pcB = &cworkB; 1215 if (!v) { 1216 pvA = NULL; 1217 pvB = NULL; 1218 } 1219 if (!idx) { 1220 pcA = NULL; 1221 if (!v) pcB = NULL; 1222 } 1223 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1224 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1225 nztot = nzA + nzB; 1226 1227 cmap = mat->garray; 1228 if (v || idx) { 1229 if (nztot) { 1230 /* Sort by increasing column numbers, assuming A and B already sorted */ 1231 PetscInt imark = -1; 1232 if (v) { 1233 *v = v_p = mat->rowvalues; 1234 for (i = 0; i < nzB; i++) { 1235 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1236 else break; 1237 } 1238 imark = i; 1239 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1240 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1241 } 1242 if (idx) { 1243 *idx = idx_p = mat->rowindices; 1244 if (imark > -1) { 1245 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1246 } else { 1247 for (i = 0; i < nzB; i++) { 1248 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1249 else break; 1250 } 1251 imark = i; 1252 } 1253 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1254 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1255 } 1256 } else { 1257 if (idx) *idx = NULL; 1258 if (v) *v = NULL; 1259 } 1260 } 1261 *nz = nztot; 1262 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1263 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1264 PetscFunctionReturn(PETSC_SUCCESS); 1265 } 1266 1267 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1268 { 1269 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1270 1271 PetscFunctionBegin; 1272 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1273 baij->getrowactive = PETSC_FALSE; 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1278 { 1279 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1280 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1281 1282 PetscFunctionBegin; 1283 aA->getrow_utriangular = PETSC_TRUE; 1284 PetscFunctionReturn(PETSC_SUCCESS); 1285 } 1286 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1287 { 1288 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1289 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1290 1291 PetscFunctionBegin; 1292 aA->getrow_utriangular = PETSC_FALSE; 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 1296 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1297 { 1298 PetscFunctionBegin; 1299 if (PetscDefined(USE_COMPLEX)) { 1300 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1301 1302 PetscCall(MatConjugate(a->A)); 1303 PetscCall(MatConjugate(a->B)); 1304 } 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } 1307 1308 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1309 { 1310 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1311 1312 PetscFunctionBegin; 1313 PetscCall(MatRealPart(a->A)); 1314 PetscCall(MatRealPart(a->B)); 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } 1317 1318 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1319 { 1320 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1321 1322 PetscFunctionBegin; 1323 PetscCall(MatImaginaryPart(a->A)); 1324 PetscCall(MatImaginaryPart(a->B)); 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 1328 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1329 Input: isrow - distributed(parallel), 1330 iscol_local - locally owned (seq) 1331 */ 1332 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1333 { 1334 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1335 const PetscInt *ptr1, *ptr2; 1336 1337 PetscFunctionBegin; 1338 *flg = PETSC_FALSE; 1339 PetscCall(ISGetLocalSize(isrow, &sz1)); 1340 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1341 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1342 1343 PetscCall(ISGetIndices(isrow, &ptr1)); 1344 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1345 1346 PetscCall(PetscMalloc1(sz1, &a1)); 1347 PetscCall(PetscMalloc1(sz2, &a2)); 1348 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1349 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1350 PetscCall(PetscSortInt(sz1, a1)); 1351 PetscCall(PetscSortInt(sz2, a2)); 1352 1353 nmatch = 0; 1354 k = 0; 1355 for (i = 0; i < sz1; i++) { 1356 for (j = k; j < sz2; j++) { 1357 if (a1[i] == a2[j]) { 1358 k = j; 1359 nmatch++; 1360 break; 1361 } 1362 } 1363 } 1364 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1365 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1366 PetscCall(PetscFree(a1)); 1367 PetscCall(PetscFree(a2)); 1368 if (nmatch < sz1) { 1369 *flg = PETSC_FALSE; 1370 } else { 1371 *flg = PETSC_TRUE; 1372 } 1373 PetscFunctionReturn(PETSC_SUCCESS); 1374 } 1375 1376 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1377 { 1378 Mat C[2]; 1379 IS iscol_local, isrow_local; 1380 PetscInt csize, csize_local, rsize; 1381 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1382 1383 PetscFunctionBegin; 1384 PetscCall(ISGetLocalSize(iscol, &csize)); 1385 PetscCall(ISGetLocalSize(isrow, &rsize)); 1386 if (call == MAT_REUSE_MATRIX) { 1387 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1388 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1389 } else { 1390 PetscCall(ISAllGather(iscol, &iscol_local)); 1391 PetscCall(ISSorted(iscol_local, &issorted)); 1392 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1393 } 1394 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1395 if (!isequal) { 1396 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1397 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1398 if (!isidentity) { 1399 if (call == MAT_REUSE_MATRIX) { 1400 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1401 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1402 } else { 1403 PetscCall(ISAllGather(isrow, &isrow_local)); 1404 PetscCall(ISSorted(isrow_local, &issorted)); 1405 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1406 } 1407 } 1408 } 1409 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1410 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1411 if (!isequal && !isidentity) { 1412 if (call == MAT_INITIAL_MATRIX) { 1413 IS intersect; 1414 PetscInt ni; 1415 1416 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1417 PetscCall(ISGetLocalSize(intersect, &ni)); 1418 PetscCall(ISDestroy(&intersect)); 1419 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1420 } 1421 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1422 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1423 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1424 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1425 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1426 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1427 PetscCall(MatDestroy(C)); 1428 PetscCall(MatDestroy(C + 1)); 1429 } 1430 if (call == MAT_INITIAL_MATRIX) { 1431 if (!isequal && !isidentity) { 1432 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1433 PetscCall(ISDestroy(&isrow_local)); 1434 } 1435 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1436 PetscCall(ISDestroy(&iscol_local)); 1437 } 1438 PetscFunctionReturn(PETSC_SUCCESS); 1439 } 1440 1441 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1442 { 1443 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1444 1445 PetscFunctionBegin; 1446 PetscCall(MatZeroEntries(l->A)); 1447 PetscCall(MatZeroEntries(l->B)); 1448 PetscFunctionReturn(PETSC_SUCCESS); 1449 } 1450 1451 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1452 { 1453 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1454 Mat A = a->A, B = a->B; 1455 PetscLogDouble isend[5], irecv[5]; 1456 1457 PetscFunctionBegin; 1458 info->block_size = (PetscReal)matin->rmap->bs; 1459 1460 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1461 1462 isend[0] = info->nz_used; 1463 isend[1] = info->nz_allocated; 1464 isend[2] = info->nz_unneeded; 1465 isend[3] = info->memory; 1466 isend[4] = info->mallocs; 1467 1468 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1469 1470 isend[0] += info->nz_used; 1471 isend[1] += info->nz_allocated; 1472 isend[2] += info->nz_unneeded; 1473 isend[3] += info->memory; 1474 isend[4] += info->mallocs; 1475 if (flag == MAT_LOCAL) { 1476 info->nz_used = isend[0]; 1477 info->nz_allocated = isend[1]; 1478 info->nz_unneeded = isend[2]; 1479 info->memory = isend[3]; 1480 info->mallocs = isend[4]; 1481 } else if (flag == MAT_GLOBAL_MAX) { 1482 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1483 1484 info->nz_used = irecv[0]; 1485 info->nz_allocated = irecv[1]; 1486 info->nz_unneeded = irecv[2]; 1487 info->memory = irecv[3]; 1488 info->mallocs = irecv[4]; 1489 } else if (flag == MAT_GLOBAL_SUM) { 1490 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1491 1492 info->nz_used = irecv[0]; 1493 info->nz_allocated = irecv[1]; 1494 info->nz_unneeded = irecv[2]; 1495 info->memory = irecv[3]; 1496 info->mallocs = irecv[4]; 1497 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1498 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1499 info->fill_ratio_needed = 0; 1500 info->factor_mallocs = 0; 1501 PetscFunctionReturn(PETSC_SUCCESS); 1502 } 1503 1504 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1505 { 1506 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1507 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1508 1509 PetscFunctionBegin; 1510 switch (op) { 1511 case MAT_NEW_NONZERO_LOCATIONS: 1512 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1513 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1514 case MAT_KEEP_NONZERO_PATTERN: 1515 case MAT_SUBMAT_SINGLEIS: 1516 case MAT_NEW_NONZERO_LOCATION_ERR: 1517 MatCheckPreallocated(A, 1); 1518 PetscCall(MatSetOption(a->A, op, flg)); 1519 PetscCall(MatSetOption(a->B, op, flg)); 1520 break; 1521 case MAT_ROW_ORIENTED: 1522 MatCheckPreallocated(A, 1); 1523 a->roworiented = flg; 1524 1525 PetscCall(MatSetOption(a->A, op, flg)); 1526 PetscCall(MatSetOption(a->B, op, flg)); 1527 break; 1528 case MAT_FORCE_DIAGONAL_ENTRIES: 1529 case MAT_SORTED_FULL: 1530 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1531 break; 1532 case MAT_IGNORE_OFF_PROC_ENTRIES: 1533 a->donotstash = flg; 1534 break; 1535 case MAT_USE_HASH_TABLE: 1536 a->ht_flag = flg; 1537 break; 1538 case MAT_HERMITIAN: 1539 MatCheckPreallocated(A, 1); 1540 PetscCall(MatSetOption(a->A, op, flg)); 1541 #if defined(PETSC_USE_COMPLEX) 1542 if (flg) { /* need different mat-vec ops */ 1543 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1544 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1545 A->ops->multtranspose = NULL; 1546 A->ops->multtransposeadd = NULL; 1547 A->symmetric = PETSC_BOOL3_FALSE; 1548 } 1549 #endif 1550 break; 1551 case MAT_SPD: 1552 case MAT_SYMMETRIC: 1553 MatCheckPreallocated(A, 1); 1554 PetscCall(MatSetOption(a->A, op, flg)); 1555 #if defined(PETSC_USE_COMPLEX) 1556 if (flg) { /* restore to use default mat-vec ops */ 1557 A->ops->mult = MatMult_MPISBAIJ; 1558 A->ops->multadd = MatMultAdd_MPISBAIJ; 1559 A->ops->multtranspose = MatMult_MPISBAIJ; 1560 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1561 } 1562 #endif 1563 break; 1564 case MAT_STRUCTURALLY_SYMMETRIC: 1565 MatCheckPreallocated(A, 1); 1566 PetscCall(MatSetOption(a->A, op, flg)); 1567 break; 1568 case MAT_SYMMETRY_ETERNAL: 1569 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1570 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1571 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1572 break; 1573 case MAT_SPD_ETERNAL: 1574 break; 1575 case MAT_IGNORE_LOWER_TRIANGULAR: 1576 aA->ignore_ltriangular = flg; 1577 break; 1578 case MAT_ERROR_LOWER_TRIANGULAR: 1579 aA->ignore_ltriangular = flg; 1580 break; 1581 case MAT_GETROW_UPPERTRIANGULAR: 1582 aA->getrow_utriangular = flg; 1583 break; 1584 default: 1585 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1586 } 1587 PetscFunctionReturn(PETSC_SUCCESS); 1588 } 1589 1590 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1591 { 1592 PetscFunctionBegin; 1593 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1594 if (reuse == MAT_INITIAL_MATRIX) { 1595 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1596 } else if (reuse == MAT_REUSE_MATRIX) { 1597 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1598 } 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1603 { 1604 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1605 Mat a = baij->A, b = baij->B; 1606 PetscInt nv, m, n; 1607 PetscBool flg; 1608 1609 PetscFunctionBegin; 1610 if (ll != rr) { 1611 PetscCall(VecEqual(ll, rr, &flg)); 1612 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1613 } 1614 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1615 1616 PetscCall(MatGetLocalSize(mat, &m, &n)); 1617 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1618 1619 PetscCall(VecGetLocalSize(rr, &nv)); 1620 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1621 1622 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1623 1624 /* left diagonalscale the off-diagonal part */ 1625 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1626 1627 /* scale the diagonal part */ 1628 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1629 1630 /* right diagonalscale the off-diagonal part */ 1631 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1632 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1633 PetscFunctionReturn(PETSC_SUCCESS); 1634 } 1635 1636 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1637 { 1638 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1639 1640 PetscFunctionBegin; 1641 PetscCall(MatSetUnfactored(a->A)); 1642 PetscFunctionReturn(PETSC_SUCCESS); 1643 } 1644 1645 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1646 1647 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1648 { 1649 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1650 Mat a, b, c, d; 1651 PetscBool flg; 1652 1653 PetscFunctionBegin; 1654 a = matA->A; 1655 b = matA->B; 1656 c = matB->A; 1657 d = matB->B; 1658 1659 PetscCall(MatEqual(a, c, &flg)); 1660 if (flg) PetscCall(MatEqual(b, d, &flg)); 1661 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1666 { 1667 PetscBool isbaij; 1668 1669 PetscFunctionBegin; 1670 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1671 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1672 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1673 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1674 PetscCall(MatGetRowUpperTriangular(A)); 1675 PetscCall(MatCopy_Basic(A, B, str)); 1676 PetscCall(MatRestoreRowUpperTriangular(A)); 1677 } else { 1678 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1679 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1680 1681 PetscCall(MatCopy(a->A, b->A, str)); 1682 PetscCall(MatCopy(a->B, b->B, str)); 1683 } 1684 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1685 PetscFunctionReturn(PETSC_SUCCESS); 1686 } 1687 1688 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1689 { 1690 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1691 PetscBLASInt bnz, one = 1; 1692 Mat_SeqSBAIJ *xa, *ya; 1693 Mat_SeqBAIJ *xb, *yb; 1694 1695 PetscFunctionBegin; 1696 if (str == SAME_NONZERO_PATTERN) { 1697 PetscScalar alpha = a; 1698 xa = (Mat_SeqSBAIJ *)xx->A->data; 1699 ya = (Mat_SeqSBAIJ *)yy->A->data; 1700 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1701 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1702 xb = (Mat_SeqBAIJ *)xx->B->data; 1703 yb = (Mat_SeqBAIJ *)yy->B->data; 1704 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1705 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1706 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1707 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1708 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1709 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1710 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1711 } else { 1712 Mat B; 1713 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1714 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1715 PetscCall(MatGetRowUpperTriangular(X)); 1716 PetscCall(MatGetRowUpperTriangular(Y)); 1717 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1718 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1719 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1720 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1721 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1722 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1723 PetscCall(MatSetType(B, MATMPISBAIJ)); 1724 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1725 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1726 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1727 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1728 PetscCall(MatHeaderMerge(Y, &B)); 1729 PetscCall(PetscFree(nnz_d)); 1730 PetscCall(PetscFree(nnz_o)); 1731 PetscCall(MatRestoreRowUpperTriangular(X)); 1732 PetscCall(MatRestoreRowUpperTriangular(Y)); 1733 } 1734 PetscFunctionReturn(PETSC_SUCCESS); 1735 } 1736 1737 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1738 { 1739 PetscInt i; 1740 PetscBool flg; 1741 1742 PetscFunctionBegin; 1743 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1744 for (i = 0; i < n; i++) { 1745 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1746 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1747 } 1748 PetscFunctionReturn(PETSC_SUCCESS); 1749 } 1750 1751 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1752 { 1753 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1754 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1755 1756 PetscFunctionBegin; 1757 if (!Y->preallocated) { 1758 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1759 } else if (!aij->nz) { 1760 PetscInt nonew = aij->nonew; 1761 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1762 aij->nonew = nonew; 1763 } 1764 PetscCall(MatShift_Basic(Y, a)); 1765 PetscFunctionReturn(PETSC_SUCCESS); 1766 } 1767 1768 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1769 { 1770 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1771 1772 PetscFunctionBegin; 1773 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1774 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1775 if (d) { 1776 PetscInt rstart; 1777 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1778 *d += rstart / A->rmap->bs; 1779 } 1780 PetscFunctionReturn(PETSC_SUCCESS); 1781 } 1782 1783 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1784 { 1785 PetscFunctionBegin; 1786 *a = ((Mat_MPISBAIJ *)A->data)->A; 1787 PetscFunctionReturn(PETSC_SUCCESS); 1788 } 1789 1790 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1791 { 1792 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1793 1794 PetscFunctionBegin; 1795 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1796 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1797 PetscFunctionReturn(PETSC_SUCCESS); 1798 } 1799 1800 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer); 1801 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]); 1802 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 1803 1804 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1805 MatGetRow_MPISBAIJ, 1806 MatRestoreRow_MPISBAIJ, 1807 MatMult_MPISBAIJ, 1808 /* 4*/ MatMultAdd_MPISBAIJ, 1809 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1810 MatMultAdd_MPISBAIJ, 1811 NULL, 1812 NULL, 1813 NULL, 1814 /* 10*/ NULL, 1815 NULL, 1816 NULL, 1817 MatSOR_MPISBAIJ, 1818 MatTranspose_MPISBAIJ, 1819 /* 15*/ MatGetInfo_MPISBAIJ, 1820 MatEqual_MPISBAIJ, 1821 MatGetDiagonal_MPISBAIJ, 1822 MatDiagonalScale_MPISBAIJ, 1823 MatNorm_MPISBAIJ, 1824 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1825 MatAssemblyEnd_MPISBAIJ, 1826 MatSetOption_MPISBAIJ, 1827 MatZeroEntries_MPISBAIJ, 1828 /* 24*/ NULL, 1829 NULL, 1830 NULL, 1831 NULL, 1832 NULL, 1833 /* 29*/ MatSetUp_MPI_Hash, 1834 NULL, 1835 NULL, 1836 MatGetDiagonalBlock_MPISBAIJ, 1837 NULL, 1838 /* 34*/ MatDuplicate_MPISBAIJ, 1839 NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 /* 39*/ MatAXPY_MPISBAIJ, 1844 MatCreateSubMatrices_MPISBAIJ, 1845 MatIncreaseOverlap_MPISBAIJ, 1846 MatGetValues_MPISBAIJ, 1847 MatCopy_MPISBAIJ, 1848 /* 44*/ NULL, 1849 MatScale_MPISBAIJ, 1850 MatShift_MPISBAIJ, 1851 NULL, 1852 NULL, 1853 /* 49*/ NULL, 1854 NULL, 1855 NULL, 1856 NULL, 1857 NULL, 1858 /* 54*/ NULL, 1859 NULL, 1860 MatSetUnfactored_MPISBAIJ, 1861 NULL, 1862 MatSetValuesBlocked_MPISBAIJ, 1863 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1864 NULL, 1865 NULL, 1866 NULL, 1867 NULL, 1868 /* 64*/ NULL, 1869 NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1874 NULL, 1875 MatConvert_MPISBAIJ_Basic, 1876 NULL, 1877 NULL, 1878 /* 74*/ NULL, 1879 NULL, 1880 NULL, 1881 NULL, 1882 NULL, 1883 /* 79*/ NULL, 1884 NULL, 1885 NULL, 1886 NULL, 1887 MatLoad_MPISBAIJ, 1888 /* 84*/ NULL, 1889 NULL, 1890 NULL, 1891 NULL, 1892 NULL, 1893 /* 89*/ NULL, 1894 NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 /* 94*/ NULL, 1899 NULL, 1900 NULL, 1901 NULL, 1902 NULL, 1903 /* 99*/ NULL, 1904 NULL, 1905 NULL, 1906 MatConjugate_MPISBAIJ, 1907 NULL, 1908 /*104*/ NULL, 1909 MatRealPart_MPISBAIJ, 1910 MatImaginaryPart_MPISBAIJ, 1911 MatGetRowUpperTriangular_MPISBAIJ, 1912 MatRestoreRowUpperTriangular_MPISBAIJ, 1913 /*109*/ NULL, 1914 NULL, 1915 NULL, 1916 NULL, 1917 MatMissingDiagonal_MPISBAIJ, 1918 /*114*/ NULL, 1919 NULL, 1920 NULL, 1921 NULL, 1922 NULL, 1923 /*119*/ NULL, 1924 NULL, 1925 NULL, 1926 NULL, 1927 NULL, 1928 /*124*/ NULL, 1929 NULL, 1930 NULL, 1931 NULL, 1932 NULL, 1933 /*129*/ NULL, 1934 NULL, 1935 NULL, 1936 NULL, 1937 NULL, 1938 /*134*/ NULL, 1939 NULL, 1940 NULL, 1941 NULL, 1942 NULL, 1943 /*139*/ MatSetBlockSizes_Default, 1944 NULL, 1945 NULL, 1946 NULL, 1947 NULL, 1948 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1949 NULL, 1950 NULL, 1951 NULL, 1952 NULL, 1953 NULL, 1954 /*150*/ NULL, 1955 MatEliminateZeros_MPISBAIJ, 1956 NULL}; 1957 1958 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1959 { 1960 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1961 PetscInt i, mbs, Mbs; 1962 PetscMPIInt size; 1963 1964 PetscFunctionBegin; 1965 if (B->hash_active) { 1966 B->ops[0] = b->cops; 1967 B->hash_active = PETSC_FALSE; 1968 } 1969 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1970 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1971 PetscCall(PetscLayoutSetUp(B->rmap)); 1972 PetscCall(PetscLayoutSetUp(B->cmap)); 1973 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1974 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1975 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1976 1977 mbs = B->rmap->n / bs; 1978 Mbs = B->rmap->N / bs; 1979 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1980 1981 B->rmap->bs = bs; 1982 b->bs2 = bs * bs; 1983 b->mbs = mbs; 1984 b->Mbs = Mbs; 1985 b->nbs = B->cmap->n / bs; 1986 b->Nbs = B->cmap->N / bs; 1987 1988 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1989 b->rstartbs = B->rmap->rstart / bs; 1990 b->rendbs = B->rmap->rend / bs; 1991 1992 b->cstartbs = B->cmap->rstart / bs; 1993 b->cendbs = B->cmap->rend / bs; 1994 1995 #if defined(PETSC_USE_CTABLE) 1996 PetscCall(PetscHMapIDestroy(&b->colmap)); 1997 #else 1998 PetscCall(PetscFree(b->colmap)); 1999 #endif 2000 PetscCall(PetscFree(b->garray)); 2001 PetscCall(VecDestroy(&b->lvec)); 2002 PetscCall(VecScatterDestroy(&b->Mvctx)); 2003 PetscCall(VecDestroy(&b->slvec0)); 2004 PetscCall(VecDestroy(&b->slvec0b)); 2005 PetscCall(VecDestroy(&b->slvec1)); 2006 PetscCall(VecDestroy(&b->slvec1a)); 2007 PetscCall(VecDestroy(&b->slvec1b)); 2008 PetscCall(VecScatterDestroy(&b->sMvctx)); 2009 2010 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2011 2012 MatSeqXAIJGetOptions_Private(b->B); 2013 PetscCall(MatDestroy(&b->B)); 2014 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2015 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2016 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2017 MatSeqXAIJRestoreOptions_Private(b->B); 2018 2019 MatSeqXAIJGetOptions_Private(b->A); 2020 PetscCall(MatDestroy(&b->A)); 2021 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2022 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2023 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 2024 MatSeqXAIJRestoreOptions_Private(b->A); 2025 2026 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2027 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2028 2029 B->preallocated = PETSC_TRUE; 2030 B->was_assembled = PETSC_FALSE; 2031 B->assembled = PETSC_FALSE; 2032 PetscFunctionReturn(PETSC_SUCCESS); 2033 } 2034 2035 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2036 { 2037 PetscInt m, rstart, cend; 2038 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2039 const PetscInt *JJ = NULL; 2040 PetscScalar *values = NULL; 2041 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2042 PetscBool nooffprocentries; 2043 2044 PetscFunctionBegin; 2045 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2046 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2047 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2048 PetscCall(PetscLayoutSetUp(B->rmap)); 2049 PetscCall(PetscLayoutSetUp(B->cmap)); 2050 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2051 m = B->rmap->n / bs; 2052 rstart = B->rmap->rstart / bs; 2053 cend = B->cmap->rend / bs; 2054 2055 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2056 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2057 for (i = 0; i < m; i++) { 2058 nz = ii[i + 1] - ii[i]; 2059 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2060 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2061 JJ = jj + ii[i]; 2062 bd = 0; 2063 for (j = 0; j < nz; j++) { 2064 if (*JJ >= i + rstart) break; 2065 JJ++; 2066 bd++; 2067 } 2068 d = 0; 2069 for (; j < nz; j++) { 2070 if (*JJ++ >= cend) break; 2071 d++; 2072 } 2073 d_nnz[i] = d; 2074 o_nnz[i] = nz - d - bd; 2075 nz = nz - bd; 2076 nz_max = PetscMax(nz_max, nz); 2077 } 2078 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2079 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2080 PetscCall(PetscFree2(d_nnz, o_nnz)); 2081 2082 values = (PetscScalar *)V; 2083 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2084 for (i = 0; i < m; i++) { 2085 PetscInt row = i + rstart; 2086 PetscInt ncols = ii[i + 1] - ii[i]; 2087 const PetscInt *icols = jj + ii[i]; 2088 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2089 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2090 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2091 } else { /* block ordering does not match so we can only insert one block at a time. */ 2092 PetscInt j; 2093 for (j = 0; j < ncols; j++) { 2094 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2095 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2096 } 2097 } 2098 } 2099 2100 if (!V) PetscCall(PetscFree(values)); 2101 nooffprocentries = B->nooffprocentries; 2102 B->nooffprocentries = PETSC_TRUE; 2103 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2104 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2105 B->nooffprocentries = nooffprocentries; 2106 2107 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2108 PetscFunctionReturn(PETSC_SUCCESS); 2109 } 2110 2111 /*MC 2112 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2113 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2114 the matrix is stored. 2115 2116 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2117 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2118 2119 Options Database Key: 2120 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2121 2122 Level: beginner 2123 2124 Note: 2125 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2126 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2127 2128 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2129 M*/ 2130 2131 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2132 { 2133 Mat_MPISBAIJ *b; 2134 PetscBool flg = PETSC_FALSE; 2135 2136 PetscFunctionBegin; 2137 PetscCall(PetscNew(&b)); 2138 B->data = (void *)b; 2139 B->ops[0] = MatOps_Values; 2140 2141 B->ops->destroy = MatDestroy_MPISBAIJ; 2142 B->ops->view = MatView_MPISBAIJ; 2143 B->assembled = PETSC_FALSE; 2144 B->insertmode = NOT_SET_VALUES; 2145 2146 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2147 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2148 2149 /* build local table of row and column ownerships */ 2150 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2151 2152 /* build cache for off array entries formed */ 2153 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2154 2155 b->donotstash = PETSC_FALSE; 2156 b->colmap = NULL; 2157 b->garray = NULL; 2158 b->roworiented = PETSC_TRUE; 2159 2160 /* stuff used in block assembly */ 2161 b->barray = NULL; 2162 2163 /* stuff used for matrix vector multiply */ 2164 b->lvec = NULL; 2165 b->Mvctx = NULL; 2166 b->slvec0 = NULL; 2167 b->slvec0b = NULL; 2168 b->slvec1 = NULL; 2169 b->slvec1a = NULL; 2170 b->slvec1b = NULL; 2171 b->sMvctx = NULL; 2172 2173 /* stuff for MatGetRow() */ 2174 b->rowindices = NULL; 2175 b->rowvalues = NULL; 2176 b->getrowactive = PETSC_FALSE; 2177 2178 /* hash table stuff */ 2179 b->ht = NULL; 2180 b->hd = NULL; 2181 b->ht_size = 0; 2182 b->ht_flag = PETSC_FALSE; 2183 b->ht_fact = 0; 2184 b->ht_total_ct = 0; 2185 b->ht_insert_ct = 0; 2186 2187 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2188 b->ijonly = PETSC_FALSE; 2189 2190 b->in_loc = NULL; 2191 b->v_loc = NULL; 2192 b->n_loc = 0; 2193 2194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2198 #if defined(PETSC_HAVE_ELEMENTAL) 2199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2200 #endif 2201 #if defined(PETSC_HAVE_SCALAPACK) 2202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2203 #endif 2204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2205 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2206 2207 B->symmetric = PETSC_BOOL3_TRUE; 2208 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2209 B->symmetry_eternal = PETSC_TRUE; 2210 B->structural_symmetry_eternal = PETSC_TRUE; 2211 #if defined(PETSC_USE_COMPLEX) 2212 B->hermitian = PETSC_BOOL3_FALSE; 2213 #else 2214 B->hermitian = PETSC_BOOL3_TRUE; 2215 #endif 2216 2217 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2218 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2219 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2220 if (flg) { 2221 PetscReal fact = 1.39; 2222 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2223 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2224 if (fact <= 1.0) fact = 1.39; 2225 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2226 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2227 } 2228 PetscOptionsEnd(); 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } 2231 2232 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2233 /*MC 2234 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2235 2236 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2237 and `MATMPISBAIJ` otherwise. 2238 2239 Options Database Key: 2240 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2241 2242 Level: beginner 2243 2244 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2245 M*/ 2246 2247 /*@C 2248 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2249 the user should preallocate the matrix storage by setting the parameters 2250 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2251 performance can be increased by more than a factor of 50. 2252 2253 Collective 2254 2255 Input Parameters: 2256 + B - the matrix 2257 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2258 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2259 . d_nz - number of block nonzeros per block row in diagonal portion of local 2260 submatrix (same for all local rows) 2261 . d_nnz - array containing the number of block nonzeros in the various block rows 2262 in the upper triangular and diagonal part of the in diagonal portion of the local 2263 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2264 for the diagonal entry and set a value even if it is zero. 2265 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2266 submatrix (same for all local rows). 2267 - o_nnz - array containing the number of nonzeros in the various block rows of the 2268 off-diagonal portion of the local submatrix that is right of the diagonal 2269 (possibly different for each block row) or `NULL`. 2270 2271 Options Database Keys: 2272 + -mat_no_unroll - uses code that does not unroll the loops in the 2273 block calculations (much slower) 2274 - -mat_block_size - size of the blocks to use 2275 2276 Level: intermediate 2277 2278 Notes: 2279 2280 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2281 than it must be used on all processors that share the object for that argument. 2282 2283 If the *_nnz parameter is given then the *_nz parameter is ignored 2284 2285 Storage Information: 2286 For a square global matrix we define each processor's diagonal portion 2287 to be its local rows and the corresponding columns (a square submatrix); 2288 each processor's off-diagonal portion encompasses the remainder of the 2289 local matrix (a rectangular submatrix). 2290 2291 The user can specify preallocated storage for the diagonal part of 2292 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2293 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2294 memory allocation. Likewise, specify preallocated storage for the 2295 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2296 2297 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2298 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2299 You can also run with the option `-info` and look for messages with the string 2300 malloc in them to see if additional memory allocation was needed. 2301 2302 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2303 the figure below we depict these three local rows and all columns (0-11). 2304 2305 .vb 2306 0 1 2 3 4 5 6 7 8 9 10 11 2307 -------------------------- 2308 row 3 |. . . d d d o o o o o o 2309 row 4 |. . . d d d o o o o o o 2310 row 5 |. . . d d d o o o o o o 2311 -------------------------- 2312 .ve 2313 2314 Thus, any entries in the d locations are stored in the d (diagonal) 2315 submatrix, and any entries in the o locations are stored in the 2316 o (off-diagonal) submatrix. Note that the d matrix is stored in 2317 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2318 2319 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2320 plus the diagonal part of the d matrix, 2321 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2322 2323 In general, for PDE problems in which most nonzeros are near the diagonal, 2324 one expects `d_nz` >> `o_nz`. 2325 2326 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2327 @*/ 2328 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2329 { 2330 PetscFunctionBegin; 2331 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2332 PetscValidType(B, 1); 2333 PetscValidLogicalCollectiveInt(B, bs, 2); 2334 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2335 PetscFunctionReturn(PETSC_SUCCESS); 2336 } 2337 2338 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2339 /*@C 2340 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2341 (block compressed row). For good matrix assembly performance 2342 the user should preallocate the matrix storage by setting the parameters 2343 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2344 2345 Collective 2346 2347 Input Parameters: 2348 + comm - MPI communicator 2349 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2350 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2351 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2352 This value should be the same as the local size used in creating the 2353 y vector for the matrix-vector product y = Ax. 2354 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2355 This value should be the same as the local size used in creating the 2356 x vector for the matrix-vector product y = Ax. 2357 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2358 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2359 . d_nz - number of block nonzeros per block row in diagonal portion of local 2360 submatrix (same for all local rows) 2361 . d_nnz - array containing the number of block nonzeros in the various block rows 2362 in the upper triangular portion of the in diagonal portion of the local 2363 (possibly different for each block block row) or `NULL`. 2364 If you plan to factor the matrix you must leave room for the diagonal entry and 2365 set its value even if it is zero. 2366 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2367 submatrix (same for all local rows). 2368 - o_nnz - array containing the number of nonzeros in the various block rows of the 2369 off-diagonal portion of the local submatrix (possibly different for 2370 each block row) or `NULL`. 2371 2372 Output Parameter: 2373 . A - the matrix 2374 2375 Options Database Keys: 2376 + -mat_no_unroll - uses code that does not unroll the loops in the 2377 block calculations (much slower) 2378 . -mat_block_size - size of the blocks to use 2379 - -mat_mpi - use the parallel matrix data structures even on one processor 2380 (defaults to using SeqBAIJ format on one processor) 2381 2382 Level: intermediate 2383 2384 Notes: 2385 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2386 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2387 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2388 2389 The number of rows and columns must be divisible by blocksize. 2390 This matrix type does not support complex Hermitian operation. 2391 2392 The user MUST specify either the local or global matrix dimensions 2393 (possibly both). 2394 2395 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2396 than it must be used on all processors that share the object for that argument. 2397 2398 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by 2399 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 2400 2401 If the *_nnz parameter is given then the *_nz parameter is ignored 2402 2403 Storage Information: 2404 For a square global matrix we define each processor's diagonal portion 2405 to be its local rows and the corresponding columns (a square submatrix); 2406 each processor's off-diagonal portion encompasses the remainder of the 2407 local matrix (a rectangular submatrix). 2408 2409 The user can specify preallocated storage for the diagonal part of 2410 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2411 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2412 memory allocation. Likewise, specify preallocated storage for the 2413 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2414 2415 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2416 the figure below we depict these three local rows and all columns (0-11). 2417 2418 .vb 2419 0 1 2 3 4 5 6 7 8 9 10 11 2420 -------------------------- 2421 row 3 |. . . d d d o o o o o o 2422 row 4 |. . . d d d o o o o o o 2423 row 5 |. . . d d d o o o o o o 2424 -------------------------- 2425 .ve 2426 2427 Thus, any entries in the d locations are stored in the d (diagonal) 2428 submatrix, and any entries in the o locations are stored in the 2429 o (off-diagonal) submatrix. Note that the d matrix is stored in 2430 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2431 2432 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2433 plus the diagonal part of the d matrix, 2434 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2435 In general, for PDE problems in which most nonzeros are near the diagonal, 2436 one expects `d_nz` >> `o_nz`. 2437 2438 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, 2439 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 2440 @*/ 2441 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2442 { 2443 PetscMPIInt size; 2444 2445 PetscFunctionBegin; 2446 PetscCall(MatCreate(comm, A)); 2447 PetscCall(MatSetSizes(*A, m, n, M, N)); 2448 PetscCallMPI(MPI_Comm_size(comm, &size)); 2449 if (size > 1) { 2450 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2451 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2452 } else { 2453 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2454 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2455 } 2456 PetscFunctionReturn(PETSC_SUCCESS); 2457 } 2458 2459 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2460 { 2461 Mat mat; 2462 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2463 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2464 PetscScalar *array; 2465 2466 PetscFunctionBegin; 2467 *newmat = NULL; 2468 2469 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2470 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2471 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2472 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2473 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2474 2475 if (matin->hash_active) { 2476 PetscCall(MatSetUp(mat)); 2477 } else { 2478 mat->factortype = matin->factortype; 2479 mat->preallocated = PETSC_TRUE; 2480 mat->assembled = PETSC_TRUE; 2481 mat->insertmode = NOT_SET_VALUES; 2482 2483 a = (Mat_MPISBAIJ *)mat->data; 2484 a->bs2 = oldmat->bs2; 2485 a->mbs = oldmat->mbs; 2486 a->nbs = oldmat->nbs; 2487 a->Mbs = oldmat->Mbs; 2488 a->Nbs = oldmat->Nbs; 2489 2490 a->size = oldmat->size; 2491 a->rank = oldmat->rank; 2492 a->donotstash = oldmat->donotstash; 2493 a->roworiented = oldmat->roworiented; 2494 a->rowindices = NULL; 2495 a->rowvalues = NULL; 2496 a->getrowactive = PETSC_FALSE; 2497 a->barray = NULL; 2498 a->rstartbs = oldmat->rstartbs; 2499 a->rendbs = oldmat->rendbs; 2500 a->cstartbs = oldmat->cstartbs; 2501 a->cendbs = oldmat->cendbs; 2502 2503 /* hash table stuff */ 2504 a->ht = NULL; 2505 a->hd = NULL; 2506 a->ht_size = 0; 2507 a->ht_flag = oldmat->ht_flag; 2508 a->ht_fact = oldmat->ht_fact; 2509 a->ht_total_ct = 0; 2510 a->ht_insert_ct = 0; 2511 2512 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2513 if (oldmat->colmap) { 2514 #if defined(PETSC_USE_CTABLE) 2515 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2516 #else 2517 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2518 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2519 #endif 2520 } else a->colmap = NULL; 2521 2522 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 2523 PetscCall(PetscMalloc1(len, &a->garray)); 2524 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2525 } else a->garray = NULL; 2526 2527 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2528 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2529 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2530 2531 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2532 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2533 2534 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2535 PetscCall(VecGetArray(a->slvec1, &array)); 2536 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2537 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2538 PetscCall(VecRestoreArray(a->slvec1, &array)); 2539 PetscCall(VecGetArray(a->slvec0, &array)); 2540 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2541 PetscCall(VecRestoreArray(a->slvec0, &array)); 2542 2543 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2544 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2545 a->sMvctx = oldmat->sMvctx; 2546 2547 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2548 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2549 } 2550 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2551 *newmat = mat; 2552 PetscFunctionReturn(PETSC_SUCCESS); 2553 } 2554 2555 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2556 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2557 2558 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2559 { 2560 PetscBool isbinary; 2561 2562 PetscFunctionBegin; 2563 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2564 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2565 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2566 PetscFunctionReturn(PETSC_SUCCESS); 2567 } 2568 2569 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2570 { 2571 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2572 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data; 2573 PetscReal atmp; 2574 PetscReal *work, *svalues, *rvalues; 2575 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2576 PetscMPIInt rank, size; 2577 PetscInt *rowners_bs, dest, count, source; 2578 PetscScalar *va; 2579 MatScalar *ba; 2580 MPI_Status stat; 2581 2582 PetscFunctionBegin; 2583 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2584 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2585 PetscCall(VecGetArray(v, &va)); 2586 2587 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2588 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2589 2590 bs = A->rmap->bs; 2591 mbs = a->mbs; 2592 Mbs = a->Mbs; 2593 ba = b->a; 2594 bi = b->i; 2595 bj = b->j; 2596 2597 /* find ownerships */ 2598 rowners_bs = A->rmap->range; 2599 2600 /* each proc creates an array to be distributed */ 2601 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2602 2603 /* row_max for B */ 2604 if (rank != size - 1) { 2605 for (i = 0; i < mbs; i++) { 2606 ncols = bi[1] - bi[0]; 2607 bi++; 2608 brow = bs * i; 2609 for (j = 0; j < ncols; j++) { 2610 bcol = bs * (*bj); 2611 for (kcol = 0; kcol < bs; kcol++) { 2612 col = bcol + kcol; /* local col index */ 2613 col += rowners_bs[rank + 1]; /* global col index */ 2614 for (krow = 0; krow < bs; krow++) { 2615 atmp = PetscAbsScalar(*ba); 2616 ba++; 2617 row = brow + krow; /* local row index */ 2618 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2619 if (work[col] < atmp) work[col] = atmp; 2620 } 2621 } 2622 bj++; 2623 } 2624 } 2625 2626 /* send values to its owners */ 2627 for (dest = rank + 1; dest < size; dest++) { 2628 svalues = work + rowners_bs[dest]; 2629 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2630 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2631 } 2632 } 2633 2634 /* receive values */ 2635 if (rank) { 2636 rvalues = work; 2637 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2638 for (source = 0; source < rank; source++) { 2639 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2640 /* process values */ 2641 for (i = 0; i < count; i++) { 2642 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2643 } 2644 } 2645 } 2646 2647 PetscCall(VecRestoreArray(v, &va)); 2648 PetscCall(PetscFree(work)); 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2653 { 2654 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2655 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2656 PetscScalar *x, *ptr, *from; 2657 Vec bb1; 2658 const PetscScalar *b; 2659 2660 PetscFunctionBegin; 2661 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2662 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2663 2664 if (flag == SOR_APPLY_UPPER) { 2665 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2666 PetscFunctionReturn(PETSC_SUCCESS); 2667 } 2668 2669 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2670 if (flag & SOR_ZERO_INITIAL_GUESS) { 2671 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2672 its--; 2673 } 2674 2675 PetscCall(VecDuplicate(bb, &bb1)); 2676 while (its--) { 2677 /* lower triangular part: slvec0b = - B^T*xx */ 2678 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2679 2680 /* copy xx into slvec0a */ 2681 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2682 PetscCall(VecGetArray(xx, &x)); 2683 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2684 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2685 2686 PetscCall(VecScale(mat->slvec0, -1.0)); 2687 2688 /* copy bb into slvec1a */ 2689 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2690 PetscCall(VecGetArrayRead(bb, &b)); 2691 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2692 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2693 2694 /* set slvec1b = 0 */ 2695 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2696 PetscCall(VecZeroEntries(mat->slvec1b)); 2697 2698 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2699 PetscCall(VecRestoreArray(xx, &x)); 2700 PetscCall(VecRestoreArrayRead(bb, &b)); 2701 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2702 2703 /* upper triangular part: bb1 = bb1 - B*x */ 2704 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2705 2706 /* local diagonal sweep */ 2707 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2708 } 2709 PetscCall(VecDestroy(&bb1)); 2710 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2711 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2712 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2713 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2714 } else if (flag & SOR_EISENSTAT) { 2715 Vec xx1; 2716 PetscBool hasop; 2717 const PetscScalar *diag; 2718 PetscScalar *sl, scale = (omega - 2.0) / omega; 2719 PetscInt i, n; 2720 2721 if (!mat->xx1) { 2722 PetscCall(VecDuplicate(bb, &mat->xx1)); 2723 PetscCall(VecDuplicate(bb, &mat->bb1)); 2724 } 2725 xx1 = mat->xx1; 2726 bb1 = mat->bb1; 2727 2728 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2729 2730 if (!mat->diag) { 2731 /* this is wrong for same matrix with new nonzero values */ 2732 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2733 PetscCall(MatGetDiagonal(matin, mat->diag)); 2734 } 2735 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2736 2737 if (hasop) { 2738 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2739 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2740 } else { 2741 /* 2742 These two lines are replaced by code that may be a bit faster for a good compiler 2743 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2744 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2745 */ 2746 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2747 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2748 PetscCall(VecGetArrayRead(bb, &b)); 2749 PetscCall(VecGetArray(xx, &x)); 2750 PetscCall(VecGetLocalSize(xx, &n)); 2751 if (omega == 1.0) { 2752 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2753 PetscCall(PetscLogFlops(2.0 * n)); 2754 } else { 2755 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2756 PetscCall(PetscLogFlops(3.0 * n)); 2757 } 2758 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2759 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2760 PetscCall(VecRestoreArrayRead(bb, &b)); 2761 PetscCall(VecRestoreArray(xx, &x)); 2762 } 2763 2764 /* multiply off-diagonal portion of matrix */ 2765 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2766 PetscCall(VecZeroEntries(mat->slvec1b)); 2767 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2768 PetscCall(VecGetArray(mat->slvec0, &from)); 2769 PetscCall(VecGetArray(xx, &x)); 2770 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2771 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2772 PetscCall(VecRestoreArray(xx, &x)); 2773 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2774 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2775 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2776 2777 /* local sweep */ 2778 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2779 PetscCall(VecAXPY(xx, 1.0, xx1)); 2780 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2781 PetscFunctionReturn(PETSC_SUCCESS); 2782 } 2783 2784 /*@ 2785 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows. 2786 2787 Collective 2788 2789 Input Parameters: 2790 + comm - MPI communicator 2791 . bs - the block size, only a block size of 1 is supported 2792 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2793 . n - This value should be the same as the local size used in creating the 2794 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 2795 calculated if `N` is given) For square matrices `n` is almost always `m`. 2796 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2797 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2798 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2799 . j - column indices 2800 - a - matrix values 2801 2802 Output Parameter: 2803 . mat - the matrix 2804 2805 Level: intermediate 2806 2807 Notes: 2808 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2809 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2810 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2811 2812 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2813 2814 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2815 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()` 2816 @*/ 2817 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2818 { 2819 PetscFunctionBegin; 2820 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2821 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2822 PetscCall(MatCreate(comm, mat)); 2823 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2824 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2825 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2826 PetscFunctionReturn(PETSC_SUCCESS); 2827 } 2828 2829 /*@C 2830 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2831 2832 Collective 2833 2834 Input Parameters: 2835 + B - the matrix 2836 . bs - the block size 2837 . i - the indices into `j` for the start of each local row (indices start with zero) 2838 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 2839 - v - optional values in the matrix, pass `NULL` if not provided 2840 2841 Level: advanced 2842 2843 Notes: 2844 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2845 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2846 called this routine. 2847 2848 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2849 and usually the numerical values as well 2850 2851 Any entries passed in that are below the diagonal are ignored 2852 2853 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 2854 `MatCreateMPISBAIJWithArrays()` 2855 @*/ 2856 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2857 { 2858 PetscFunctionBegin; 2859 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2860 PetscFunctionReturn(PETSC_SUCCESS); 2861 } 2862 2863 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2864 { 2865 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2866 PetscInt *indx; 2867 PetscScalar *values; 2868 2869 PetscFunctionBegin; 2870 PetscCall(MatGetSize(inmat, &m, &N)); 2871 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2872 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2873 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2874 PetscInt *bindx, rmax = a->rmax, j; 2875 PetscMPIInt rank, size; 2876 2877 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2878 mbs = m / bs; 2879 Nbs = N / cbs; 2880 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2881 nbs = n / cbs; 2882 2883 PetscCall(PetscMalloc1(rmax, &bindx)); 2884 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2885 2886 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2887 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2888 if (rank == size - 1) { 2889 /* Check sum(nbs) = Nbs */ 2890 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2891 } 2892 2893 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2894 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2895 for (i = 0; i < mbs; i++) { 2896 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2897 nnz = nnz / bs; 2898 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2899 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2900 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2901 } 2902 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2903 PetscCall(PetscFree(bindx)); 2904 2905 PetscCall(MatCreate(comm, outmat)); 2906 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2907 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2908 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2909 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2910 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2911 MatPreallocateEnd(dnz, onz); 2912 } 2913 2914 /* numeric phase */ 2915 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2916 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2917 2918 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2919 for (i = 0; i < m; i++) { 2920 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2921 Ii = i + rstart; 2922 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2923 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2924 } 2925 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2926 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2927 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2928 PetscFunctionReturn(PETSC_SUCCESS); 2929 } 2930