1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 a_noinsert:; \ 238 ailen[brow] = nrow; \ 239 } while (0) 240 241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 242 do { \ 243 brow = row / bs; \ 244 rp = bj + bi[brow]; \ 245 ap = ba + bs2 * bi[brow]; \ 246 rmax = bimax[brow]; \ 247 nrow = bilen[brow]; \ 248 bcol = col / bs; \ 249 ridx = row % bs; \ 250 cidx = col % bs; \ 251 low = 0; \ 252 high = nrow; \ 253 while (high - low > 3) { \ 254 t = (low + high) / 2; \ 255 if (rp[t] > bcol) high = t; \ 256 else low = t; \ 257 } \ 258 for (_i = low; _i < high; _i++) { \ 259 if (rp[_i] > bcol) break; \ 260 if (rp[_i] == bcol) { \ 261 bap = ap + bs2 * _i + bs * cidx + ridx; \ 262 if (addv == ADD_VALUES) *bap += value; \ 263 else *bap = value; \ 264 goto b_noinsert; \ 265 } \ 266 } \ 267 if (b->nonew == 1) goto b_noinsert; \ 268 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 269 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 270 N = nrow++ - 1; \ 271 /* shift up all the later entries in this row */ \ 272 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 273 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 274 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 275 rp[_i] = bcol; \ 276 ap[bs2 * _i + bs * cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } while (0) 280 281 /* Only add/insert a(i,j) with i<=j (blocks). 282 Any a(i,j) with i>j input by user is ignored or generates an error 283 */ 284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 285 { 286 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 287 MatScalar value; 288 PetscBool roworiented = baij->roworiented; 289 PetscInt i, j, row, col; 290 PetscInt rstart_orig = mat->rmap->rstart; 291 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 292 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 293 294 /* Some Variables required in the macro */ 295 Mat A = baij->A; 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 297 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 298 MatScalar *aa = a->a; 299 300 Mat B = baij->B; 301 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 302 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 303 MatScalar *ba = b->a; 304 305 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 306 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 307 MatScalar *ap, *bap; 308 309 /* for stash */ 310 PetscInt n_loc, *in_loc = NULL; 311 MatScalar *v_loc = NULL; 312 313 PetscFunctionBegin; 314 if (!baij->donotstash) { 315 if (n > baij->n_loc) { 316 PetscCall(PetscFree(baij->in_loc)); 317 PetscCall(PetscFree(baij->v_loc)); 318 PetscCall(PetscMalloc1(n, &baij->in_loc)); 319 PetscCall(PetscMalloc1(n, &baij->v_loc)); 320 321 baij->n_loc = n; 322 } 323 in_loc = baij->in_loc; 324 v_loc = baij->v_loc; 325 } 326 327 for (i = 0; i < m; i++) { 328 if (im[i] < 0) continue; 329 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 330 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 331 row = im[i] - rstart_orig; /* local row index */ 332 for (j = 0; j < n; j++) { 333 if (im[i] / bs > in[j] / bs) { 334 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 335 continue; /* ignore lower triangular blocks */ 336 } 337 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 338 col = in[j] - cstart_orig; /* local col index */ 339 brow = row / bs; 340 bcol = col / bs; 341 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 342 if (roworiented) value = v[i * n + j]; 343 else value = v[i + j * m]; 344 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 345 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 346 } else if (in[j] < 0) { 347 continue; 348 } else { 349 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 350 /* off-diag entry (B) */ 351 if (mat->was_assembled) { 352 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 353 #if defined(PETSC_USE_CTABLE) 354 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 355 col = col - 1; 356 #else 357 col = baij->colmap[in[j] / bs] - 1; 358 #endif 359 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) { 360 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 361 col = in[j]; 362 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 363 B = baij->B; 364 b = (Mat_SeqBAIJ *)B->data; 365 bimax = b->imax; 366 bi = b->i; 367 bilen = b->ilen; 368 bj = b->j; 369 ba = b->a; 370 } else col += in[j] % bs; 371 } else col = in[j]; 372 if (roworiented) value = v[i * n + j]; 373 else value = v[i + j * m]; 374 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 375 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 376 } 377 } 378 } else { /* off processor entry */ 379 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 380 if (!baij->donotstash) { 381 mat->assembled = PETSC_FALSE; 382 n_loc = 0; 383 for (j = 0; j < n; j++) { 384 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 385 in_loc[n_loc] = in[j]; 386 if (roworiented) { 387 v_loc[n_loc] = v[i * n + j]; 388 } else { 389 v_loc[n_loc] = v[j * m + i]; 390 } 391 n_loc++; 392 } 393 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 394 } 395 } 396 } 397 PetscFunctionReturn(PETSC_SUCCESS); 398 } 399 400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 401 { 402 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 403 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 404 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 405 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 406 PetscBool roworiented = a->roworiented; 407 const PetscScalar *value = v; 408 MatScalar *ap, *aa = a->a, *bap; 409 410 PetscFunctionBegin; 411 if (col < row) { 412 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 413 PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 414 } 415 rp = aj + ai[row]; 416 ap = aa + bs2 * ai[row]; 417 rmax = imax[row]; 418 nrow = ailen[row]; 419 value = v; 420 low = 0; 421 high = nrow; 422 423 while (high - low > 7) { 424 t = (low + high) / 2; 425 if (rp[t] > col) high = t; 426 else low = t; 427 } 428 for (i = low; i < high; i++) { 429 if (rp[i] > col) break; 430 if (rp[i] == col) { 431 bap = ap + bs2 * i; 432 if (roworiented) { 433 if (is == ADD_VALUES) { 434 for (ii = 0; ii < bs; ii++) { 435 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 436 } 437 } else { 438 for (ii = 0; ii < bs; ii++) { 439 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 440 } 441 } 442 } else { 443 if (is == ADD_VALUES) { 444 for (ii = 0; ii < bs; ii++) { 445 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 446 } 447 } else { 448 for (ii = 0; ii < bs; ii++) { 449 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 450 } 451 } 452 } 453 goto noinsert2; 454 } 455 } 456 if (nonew == 1) goto noinsert2; 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 458 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 459 N = nrow++ - 1; 460 high++; 461 /* shift up all the later entries in this row */ 462 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 463 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 464 rp[i] = col; 465 bap = ap + bs2 * i; 466 if (roworiented) { 467 for (ii = 0; ii < bs; ii++) { 468 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 469 } 470 } else { 471 for (ii = 0; ii < bs; ii++) { 472 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 473 } 474 } 475 noinsert2:; 476 ailen[row] = nrow; 477 PetscFunctionReturn(PETSC_SUCCESS); 478 } 479 480 /* 481 This routine is exactly duplicated in mpibaij.c 482 */ 483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 484 { 485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 486 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 487 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 488 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 489 PetscBool roworiented = a->roworiented; 490 const PetscScalar *value = v; 491 MatScalar *ap, *aa = a->a, *bap; 492 493 PetscFunctionBegin; 494 rp = aj + ai[row]; 495 ap = aa + bs2 * ai[row]; 496 rmax = imax[row]; 497 nrow = ailen[row]; 498 low = 0; 499 high = nrow; 500 value = v; 501 while (high - low > 7) { 502 t = (low + high) / 2; 503 if (rp[t] > col) high = t; 504 else low = t; 505 } 506 for (i = low; i < high; i++) { 507 if (rp[i] > col) break; 508 if (rp[i] == col) { 509 bap = ap + bs2 * i; 510 if (roworiented) { 511 if (is == ADD_VALUES) { 512 for (ii = 0; ii < bs; ii++) { 513 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 514 } 515 } else { 516 for (ii = 0; ii < bs; ii++) { 517 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 518 } 519 } 520 } else { 521 if (is == ADD_VALUES) { 522 for (ii = 0; ii < bs; ii++, value += bs) { 523 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 524 bap += bs; 525 } 526 } else { 527 for (ii = 0; ii < bs; ii++, value += bs) { 528 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 529 bap += bs; 530 } 531 } 532 } 533 goto noinsert2; 534 } 535 } 536 if (nonew == 1) goto noinsert2; 537 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 538 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 539 N = nrow++ - 1; 540 high++; 541 /* shift up all the later entries in this row */ 542 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 543 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 544 rp[i] = col; 545 bap = ap + bs2 * i; 546 if (roworiented) { 547 for (ii = 0; ii < bs; ii++) { 548 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 549 } 550 } else { 551 for (ii = 0; ii < bs; ii++) { 552 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 553 } 554 } 555 noinsert2:; 556 ailen[row] = nrow; 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 /* 561 This routine could be optimized by removing the need for the block copy below and passing stride information 562 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 563 */ 564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 565 { 566 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 567 const MatScalar *value; 568 MatScalar *barray = baij->barray; 569 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 570 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 571 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 572 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 573 574 PetscFunctionBegin; 575 if (!barray) { 576 PetscCall(PetscMalloc1(bs2, &barray)); 577 baij->barray = barray; 578 } 579 580 if (roworiented) { 581 stepval = (n - 1) * bs; 582 } else { 583 stepval = (m - 1) * bs; 584 } 585 for (i = 0; i < m; i++) { 586 if (im[i] < 0) continue; 587 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 588 if (im[i] >= rstart && im[i] < rend) { 589 row = im[i] - rstart; 590 for (j = 0; j < n; j++) { 591 if (im[i] > in[j]) { 592 PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 593 continue; /* ignore lower triangular blocks */ 594 } 595 /* If NumCol = 1 then a copy is not required */ 596 if (roworiented && n == 1) { 597 barray = (MatScalar *)v + i * bs2; 598 } else if ((!roworiented) && (m == 1)) { 599 barray = (MatScalar *)v + j * bs2; 600 } else { /* Here a copy is required */ 601 if (roworiented) { 602 value = v + i * (stepval + bs) * bs + j * bs; 603 } else { 604 value = v + j * (stepval + bs) * bs + i * bs; 605 } 606 for (ii = 0; ii < bs; ii++, value += stepval) { 607 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 608 } 609 barray -= bs2; 610 } 611 612 if (in[j] >= cstart && in[j] < cend) { 613 col = in[j] - cstart; 614 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 615 } else if (in[j] < 0) { 616 continue; 617 } else { 618 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 619 if (mat->was_assembled) { 620 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 621 622 #if defined(PETSC_USE_CTABLE) 623 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 624 col = col < 1 ? -1 : (col - 1) / bs; 625 #else 626 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 627 #endif 628 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 629 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 630 col = in[j]; 631 } 632 } else col = in[j]; 633 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 634 } 635 } 636 } else { 637 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 651 { 652 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 653 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 654 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 655 656 PetscFunctionBegin; 657 for (i = 0; i < m; i++) { 658 if (idxm[i] < 0) continue; /* negative row */ 659 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 660 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 661 row = idxm[i] - bsrstart; 662 for (j = 0; j < n; j++) { 663 if (idxn[j] < 0) continue; /* negative column */ 664 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 665 if (idxn[j] >= bscstart && idxn[j] < bscend) { 666 col = idxn[j] - bscstart; 667 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 668 } else { 669 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 670 #if defined(PETSC_USE_CTABLE) 671 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 672 data--; 673 #else 674 data = baij->colmap[idxn[j] / bs] - 1; 675 #endif 676 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 677 else { 678 col = data + idxn[j] % bs; 679 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 680 } 681 } 682 } 683 } 684 PetscFunctionReturn(PETSC_SUCCESS); 685 } 686 687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 688 { 689 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 690 PetscReal sum[2], *lnorm2; 691 692 PetscFunctionBegin; 693 if (baij->size == 1) { 694 PetscCall(MatNorm(baij->A, type, norm)); 695 } else { 696 if (type == NORM_FROBENIUS) { 697 PetscCall(PetscMalloc1(2, &lnorm2)); 698 PetscCall(MatNorm(baij->A, type, lnorm2)); 699 *lnorm2 = (*lnorm2) * (*lnorm2); 700 lnorm2++; /* squar power of norm(A) */ 701 PetscCall(MatNorm(baij->B, type, lnorm2)); 702 *lnorm2 = (*lnorm2) * (*lnorm2); 703 lnorm2--; /* squar power of norm(B) */ 704 PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 705 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 706 PetscCall(PetscFree(lnorm2)); 707 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 708 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 709 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 710 PetscReal *rsum, vabs; 711 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 712 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 713 MatScalar *v; 714 715 PetscCall(PetscMalloc1(mat->cmap->N, &rsum)); 716 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 717 /* Amat */ 718 v = amat->a; 719 jj = amat->j; 720 for (brow = 0; brow < mbs; brow++) { 721 grow = bs * (rstart + brow); 722 nz = amat->i[brow + 1] - amat->i[brow]; 723 for (bcol = 0; bcol < nz; bcol++) { 724 gcol = bs * (rstart + *jj); 725 jj++; 726 for (col = 0; col < bs; col++) { 727 for (row = 0; row < bs; row++) { 728 vabs = PetscAbsScalar(*v); 729 v++; 730 rsum[gcol + col] += vabs; 731 /* non-diagonal block */ 732 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 733 } 734 } 735 } 736 PetscCall(PetscLogFlops(nz * bs * bs)); 737 } 738 /* Bmat */ 739 v = bmat->a; 740 jj = bmat->j; 741 for (brow = 0; brow < mbs; brow++) { 742 grow = bs * (rstart + brow); 743 nz = bmat->i[brow + 1] - bmat->i[brow]; 744 for (bcol = 0; bcol < nz; bcol++) { 745 gcol = bs * garray[*jj]; 746 jj++; 747 for (col = 0; col < bs; col++) { 748 for (row = 0; row < bs; row++) { 749 vabs = PetscAbsScalar(*v); 750 v++; 751 rsum[gcol + col] += vabs; 752 rsum[grow + row] += vabs; 753 } 754 } 755 } 756 PetscCall(PetscLogFlops(nz * bs * bs)); 757 } 758 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 759 *norm = 0.0; 760 for (col = 0; col < mat->cmap->N; col++) { 761 if (rsum[col] > *norm) *norm = rsum[col]; 762 } 763 PetscCall(PetscFree(rsum)); 764 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 765 } 766 PetscFunctionReturn(PETSC_SUCCESS); 767 } 768 769 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 770 { 771 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 772 PetscInt nstash, reallocs; 773 774 PetscFunctionBegin; 775 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 776 777 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 778 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 779 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 780 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 781 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 782 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 783 PetscFunctionReturn(PETSC_SUCCESS); 784 } 785 786 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 787 { 788 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 789 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 790 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 791 PetscInt *row, *col; 792 PetscBool all_assembled; 793 PetscMPIInt n; 794 PetscBool r1, r2, r3; 795 MatScalar *val; 796 797 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 798 PetscFunctionBegin; 799 if (!baij->donotstash && !mat->nooffprocentries) { 800 while (1) { 801 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 802 if (!flg) break; 803 804 for (i = 0; i < n;) { 805 /* Now identify the consecutive vals belonging to the same row */ 806 for (j = i, rstart = row[j]; j < n; j++) { 807 if (row[j] != rstart) break; 808 } 809 if (j < n) ncols = j - i; 810 else ncols = n - i; 811 /* Now assemble all these values with a single function call */ 812 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 813 i = j; 814 } 815 } 816 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 817 /* Now process the block-stash. Since the values are stashed column-oriented, 818 set the row-oriented flag to column-oriented, and after MatSetValues() 819 restore the original flags */ 820 r1 = baij->roworiented; 821 r2 = a->roworiented; 822 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 823 824 baij->roworiented = PETSC_FALSE; 825 a->roworiented = PETSC_FALSE; 826 827 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */ 828 while (1) { 829 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 830 if (!flg) break; 831 832 for (i = 0; i < n;) { 833 /* Now identify the consecutive vals belonging to the same row */ 834 for (j = i, rstart = row[j]; j < n; j++) { 835 if (row[j] != rstart) break; 836 } 837 if (j < n) ncols = j - i; 838 else ncols = n - i; 839 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 840 i = j; 841 } 842 } 843 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 844 845 baij->roworiented = r1; 846 a->roworiented = r2; 847 848 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 849 } 850 851 PetscCall(MatAssemblyBegin(baij->A, mode)); 852 PetscCall(MatAssemblyEnd(baij->A, mode)); 853 854 /* determine if any process has disassembled, if so we must 855 also disassemble ourselves, in order that we may reassemble. */ 856 /* 857 if nonzero structure of submatrix B cannot change then we know that 858 no process disassembled thus we can skip this stuff 859 */ 860 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 861 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 862 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 863 } 864 865 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ 866 PetscCall(MatAssemblyBegin(baij->B, mode)); 867 PetscCall(MatAssemblyEnd(baij->B, mode)); 868 869 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 870 871 baij->rowvalues = NULL; 872 873 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 874 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 875 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 876 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 877 } 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 882 #include <petscdraw.h> 883 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 884 { 885 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 886 PetscInt bs = mat->rmap->bs; 887 PetscMPIInt rank = baij->rank; 888 PetscBool isascii, isdraw; 889 PetscViewer sviewer; 890 PetscViewerFormat format; 891 892 PetscFunctionBegin; 893 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 894 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 895 if (isascii) { 896 PetscCall(PetscViewerGetFormat(viewer, &format)); 897 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 898 MatInfo info; 899 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 900 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 901 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 902 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 903 mat->rmap->bs, info.memory)); 904 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 905 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 906 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 907 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 908 PetscCall(PetscViewerFlush(viewer)); 909 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 910 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 911 PetscCall(VecScatterView(baij->Mvctx, viewer)); 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } else if (format == PETSC_VIEWER_ASCII_INFO) { 914 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 915 PetscFunctionReturn(PETSC_SUCCESS); 916 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 917 PetscFunctionReturn(PETSC_SUCCESS); 918 } 919 } 920 921 if (isdraw) { 922 PetscDraw draw; 923 PetscBool isnull; 924 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 925 PetscCall(PetscDrawIsNull(draw, &isnull)); 926 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 927 } 928 929 { 930 /* assemble the entire matrix onto first processor. */ 931 Mat A; 932 Mat_SeqSBAIJ *Aloc; 933 Mat_SeqBAIJ *Bloc; 934 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 935 MatScalar *a; 936 const char *matname; 937 938 /* Should this be the same type as mat? */ 939 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 940 if (rank == 0) { 941 PetscCall(MatSetSizes(A, M, N, M, N)); 942 } else { 943 PetscCall(MatSetSizes(A, 0, 0, M, N)); 944 } 945 PetscCall(MatSetType(A, MATMPISBAIJ)); 946 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 947 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 948 949 /* copy over the A part */ 950 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 951 ai = Aloc->i; 952 aj = Aloc->j; 953 a = Aloc->a; 954 PetscCall(PetscMalloc1(bs, &rvals)); 955 956 for (i = 0; i < mbs; i++) { 957 rvals[0] = bs * (baij->rstartbs + i); 958 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 959 for (j = ai[i]; j < ai[i + 1]; j++) { 960 col = (baij->cstartbs + aj[j]) * bs; 961 for (k = 0; k < bs; k++) { 962 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 963 col++; 964 a += bs; 965 } 966 } 967 } 968 /* copy over the B part */ 969 Bloc = (Mat_SeqBAIJ *)baij->B->data; 970 ai = Bloc->i; 971 aj = Bloc->j; 972 a = Bloc->a; 973 for (i = 0; i < mbs; i++) { 974 rvals[0] = bs * (baij->rstartbs + i); 975 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 976 for (j = ai[i]; j < ai[i + 1]; j++) { 977 col = baij->garray[aj[j]] * bs; 978 for (k = 0; k < bs; k++) { 979 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 980 col++; 981 a += bs; 982 } 983 } 984 } 985 PetscCall(PetscFree(rvals)); 986 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 987 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 988 /* 989 Everyone has to call to draw the matrix since the graphics waits are 990 synchronized across all processors that share the PetscDraw object 991 */ 992 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 993 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 994 if (rank == 0) { 995 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname)); 996 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer)); 997 } 998 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 999 PetscCall(MatDestroy(&A)); 1000 } 1001 PetscFunctionReturn(PETSC_SUCCESS); 1002 } 1003 1004 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1005 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1006 1007 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1008 { 1009 PetscBool isascii, isdraw, issocket, isbinary; 1010 1011 PetscFunctionBegin; 1012 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1013 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1014 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1015 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1016 if (isascii || isdraw || issocket) PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1017 else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1018 PetscFunctionReturn(PETSC_SUCCESS); 1019 } 1020 1021 #if defined(PETSC_USE_COMPLEX) 1022 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1023 { 1024 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1025 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1026 PetscScalar *from; 1027 const PetscScalar *x; 1028 1029 PetscFunctionBegin; 1030 /* diagonal part */ 1031 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1032 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1033 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1034 PetscCall(VecZeroEntries(a->slvec1b)); 1035 1036 /* subdiagonal part */ 1037 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1038 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1039 1040 /* copy x into the vec slvec0 */ 1041 PetscCall(VecGetArray(a->slvec0, &from)); 1042 PetscCall(VecGetArrayRead(xx, &x)); 1043 1044 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1045 PetscCall(VecRestoreArray(a->slvec0, &from)); 1046 PetscCall(VecRestoreArrayRead(xx, &x)); 1047 1048 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1049 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1050 /* supperdiagonal part */ 1051 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 #endif 1055 1056 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1057 { 1058 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1059 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1060 PetscScalar *from; 1061 const PetscScalar *x; 1062 1063 PetscFunctionBegin; 1064 /* diagonal part */ 1065 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1066 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1067 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1068 PetscCall(VecZeroEntries(a->slvec1b)); 1069 1070 /* subdiagonal part */ 1071 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1072 1073 /* copy x into the vec slvec0 */ 1074 PetscCall(VecGetArray(a->slvec0, &from)); 1075 PetscCall(VecGetArrayRead(xx, &x)); 1076 1077 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1078 PetscCall(VecRestoreArray(a->slvec0, &from)); 1079 PetscCall(VecRestoreArrayRead(xx, &x)); 1080 1081 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1082 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1083 /* supperdiagonal part */ 1084 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1085 PetscFunctionReturn(PETSC_SUCCESS); 1086 } 1087 1088 #if PetscDefined(USE_COMPLEX) 1089 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1090 { 1091 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1092 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1093 PetscScalar *from; 1094 const PetscScalar *x; 1095 1096 PetscFunctionBegin; 1097 /* diagonal part */ 1098 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1099 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1100 PetscCall(VecZeroEntries(a->slvec1b)); 1101 1102 /* subdiagonal part */ 1103 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1104 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1105 1106 /* copy x into the vec slvec0 */ 1107 PetscCall(VecGetArray(a->slvec0, &from)); 1108 PetscCall(VecGetArrayRead(xx, &x)); 1109 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1110 PetscCall(VecRestoreArray(a->slvec0, &from)); 1111 1112 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1113 PetscCall(VecRestoreArrayRead(xx, &x)); 1114 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1115 1116 /* supperdiagonal part */ 1117 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1118 PetscFunctionReturn(PETSC_SUCCESS); 1119 } 1120 #endif 1121 1122 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1123 { 1124 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1125 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1126 PetscScalar *from; 1127 const PetscScalar *x; 1128 1129 PetscFunctionBegin; 1130 /* diagonal part */ 1131 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1132 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1133 PetscCall(VecZeroEntries(a->slvec1b)); 1134 1135 /* subdiagonal part */ 1136 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1137 1138 /* copy x into the vec slvec0 */ 1139 PetscCall(VecGetArray(a->slvec0, &from)); 1140 PetscCall(VecGetArrayRead(xx, &x)); 1141 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1142 PetscCall(VecRestoreArray(a->slvec0, &from)); 1143 1144 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1145 PetscCall(VecRestoreArrayRead(xx, &x)); 1146 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1147 1148 /* supperdiagonal part */ 1149 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1150 PetscFunctionReturn(PETSC_SUCCESS); 1151 } 1152 1153 /* 1154 This only works correctly for square matrices where the subblock A->A is the 1155 diagonal block 1156 */ 1157 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1158 { 1159 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1160 1161 PetscFunctionBegin; 1162 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1163 PetscCall(MatGetDiagonal(a->A, v)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1168 { 1169 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1170 1171 PetscFunctionBegin; 1172 PetscCall(MatScale(a->A, aa)); 1173 PetscCall(MatScale(a->B, aa)); 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1178 { 1179 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1180 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1181 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1182 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1183 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1184 1185 PetscFunctionBegin; 1186 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1187 mat->getrowactive = PETSC_TRUE; 1188 1189 if (!mat->rowvalues && (idx || v)) { 1190 /* 1191 allocate enough space to hold information from the longest row. 1192 */ 1193 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1194 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1195 PetscInt max = 1, mbs = mat->mbs, tmp; 1196 for (i = 0; i < mbs; i++) { 1197 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1198 if (max < tmp) max = tmp; 1199 } 1200 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1201 } 1202 1203 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1204 lrow = row - brstart; /* local row index */ 1205 1206 pvA = &vworkA; 1207 pcA = &cworkA; 1208 pvB = &vworkB; 1209 pcB = &cworkB; 1210 if (!v) { 1211 pvA = NULL; 1212 pvB = NULL; 1213 } 1214 if (!idx) { 1215 pcA = NULL; 1216 if (!v) pcB = NULL; 1217 } 1218 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1219 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1220 nztot = nzA + nzB; 1221 1222 cmap = mat->garray; 1223 if (v || idx) { 1224 if (nztot) { 1225 /* Sort by increasing column numbers, assuming A and B already sorted */ 1226 PetscInt imark = -1; 1227 if (v) { 1228 *v = v_p = mat->rowvalues; 1229 for (i = 0; i < nzB; i++) { 1230 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1231 else break; 1232 } 1233 imark = i; 1234 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1235 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1236 } 1237 if (idx) { 1238 *idx = idx_p = mat->rowindices; 1239 if (imark > -1) { 1240 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1241 } else { 1242 for (i = 0; i < nzB; i++) { 1243 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1244 else break; 1245 } 1246 imark = i; 1247 } 1248 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1249 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1250 } 1251 } else { 1252 if (idx) *idx = NULL; 1253 if (v) *v = NULL; 1254 } 1255 } 1256 *nz = nztot; 1257 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1258 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1259 PetscFunctionReturn(PETSC_SUCCESS); 1260 } 1261 1262 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1263 { 1264 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1265 1266 PetscFunctionBegin; 1267 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1268 baij->getrowactive = PETSC_FALSE; 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 1272 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1273 { 1274 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1275 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1276 1277 PetscFunctionBegin; 1278 aA->getrow_utriangular = PETSC_TRUE; 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1282 { 1283 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1284 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1285 1286 PetscFunctionBegin; 1287 aA->getrow_utriangular = PETSC_FALSE; 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 1291 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1292 { 1293 PetscFunctionBegin; 1294 if (PetscDefined(USE_COMPLEX)) { 1295 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1296 1297 PetscCall(MatConjugate(a->A)); 1298 PetscCall(MatConjugate(a->B)); 1299 } 1300 PetscFunctionReturn(PETSC_SUCCESS); 1301 } 1302 1303 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1304 { 1305 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1306 1307 PetscFunctionBegin; 1308 PetscCall(MatRealPart(a->A)); 1309 PetscCall(MatRealPart(a->B)); 1310 PetscFunctionReturn(PETSC_SUCCESS); 1311 } 1312 1313 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1314 { 1315 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1316 1317 PetscFunctionBegin; 1318 PetscCall(MatImaginaryPart(a->A)); 1319 PetscCall(MatImaginaryPart(a->B)); 1320 PetscFunctionReturn(PETSC_SUCCESS); 1321 } 1322 1323 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1324 Input: isrow - distributed(parallel), 1325 iscol_local - locally owned (seq) 1326 */ 1327 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1328 { 1329 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1330 const PetscInt *ptr1, *ptr2; 1331 1332 PetscFunctionBegin; 1333 *flg = PETSC_FALSE; 1334 PetscCall(ISGetLocalSize(isrow, &sz1)); 1335 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1336 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1337 1338 PetscCall(ISGetIndices(isrow, &ptr1)); 1339 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1340 1341 PetscCall(PetscMalloc1(sz1, &a1)); 1342 PetscCall(PetscMalloc1(sz2, &a2)); 1343 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1344 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1345 PetscCall(PetscSortInt(sz1, a1)); 1346 PetscCall(PetscSortInt(sz2, a2)); 1347 1348 nmatch = 0; 1349 k = 0; 1350 for (i = 0; i < sz1; i++) { 1351 for (j = k; j < sz2; j++) { 1352 if (a1[i] == a2[j]) { 1353 k = j; 1354 nmatch++; 1355 break; 1356 } 1357 } 1358 } 1359 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1360 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1361 PetscCall(PetscFree(a1)); 1362 PetscCall(PetscFree(a2)); 1363 if (nmatch < sz1) { 1364 *flg = PETSC_FALSE; 1365 } else { 1366 *flg = PETSC_TRUE; 1367 } 1368 PetscFunctionReturn(PETSC_SUCCESS); 1369 } 1370 1371 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1372 { 1373 Mat C[2]; 1374 IS iscol_local, isrow_local; 1375 PetscInt csize, csize_local, rsize; 1376 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1377 1378 PetscFunctionBegin; 1379 PetscCall(ISGetLocalSize(iscol, &csize)); 1380 PetscCall(ISGetLocalSize(isrow, &rsize)); 1381 if (call == MAT_REUSE_MATRIX) { 1382 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1383 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1384 } else { 1385 PetscCall(ISAllGather(iscol, &iscol_local)); 1386 PetscCall(ISSorted(iscol_local, &issorted)); 1387 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1388 } 1389 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1390 if (!isequal) { 1391 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1392 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1393 if (!isidentity) { 1394 if (call == MAT_REUSE_MATRIX) { 1395 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1396 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1397 } else { 1398 PetscCall(ISAllGather(isrow, &isrow_local)); 1399 PetscCall(ISSorted(isrow_local, &issorted)); 1400 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1401 } 1402 } 1403 } 1404 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1405 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1406 if (!isequal && !isidentity) { 1407 if (call == MAT_INITIAL_MATRIX) { 1408 IS intersect; 1409 PetscInt ni; 1410 1411 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1412 PetscCall(ISGetLocalSize(intersect, &ni)); 1413 PetscCall(ISDestroy(&intersect)); 1414 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1415 } 1416 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1417 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1418 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1419 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1420 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1421 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1422 PetscCall(MatDestroy(C)); 1423 PetscCall(MatDestroy(C + 1)); 1424 } 1425 if (call == MAT_INITIAL_MATRIX) { 1426 if (!isequal && !isidentity) { 1427 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1428 PetscCall(ISDestroy(&isrow_local)); 1429 } 1430 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1431 PetscCall(ISDestroy(&iscol_local)); 1432 } 1433 PetscFunctionReturn(PETSC_SUCCESS); 1434 } 1435 1436 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1437 { 1438 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1439 1440 PetscFunctionBegin; 1441 PetscCall(MatZeroEntries(l->A)); 1442 PetscCall(MatZeroEntries(l->B)); 1443 PetscFunctionReturn(PETSC_SUCCESS); 1444 } 1445 1446 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1447 { 1448 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1449 Mat A = a->A, B = a->B; 1450 PetscLogDouble isend[5], irecv[5]; 1451 1452 PetscFunctionBegin; 1453 info->block_size = (PetscReal)matin->rmap->bs; 1454 1455 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1456 1457 isend[0] = info->nz_used; 1458 isend[1] = info->nz_allocated; 1459 isend[2] = info->nz_unneeded; 1460 isend[3] = info->memory; 1461 isend[4] = info->mallocs; 1462 1463 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1464 1465 isend[0] += info->nz_used; 1466 isend[1] += info->nz_allocated; 1467 isend[2] += info->nz_unneeded; 1468 isend[3] += info->memory; 1469 isend[4] += info->mallocs; 1470 if (flag == MAT_LOCAL) { 1471 info->nz_used = isend[0]; 1472 info->nz_allocated = isend[1]; 1473 info->nz_unneeded = isend[2]; 1474 info->memory = isend[3]; 1475 info->mallocs = isend[4]; 1476 } else if (flag == MAT_GLOBAL_MAX) { 1477 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1478 1479 info->nz_used = irecv[0]; 1480 info->nz_allocated = irecv[1]; 1481 info->nz_unneeded = irecv[2]; 1482 info->memory = irecv[3]; 1483 info->mallocs = irecv[4]; 1484 } else if (flag == MAT_GLOBAL_SUM) { 1485 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1486 1487 info->nz_used = irecv[0]; 1488 info->nz_allocated = irecv[1]; 1489 info->nz_unneeded = irecv[2]; 1490 info->memory = irecv[3]; 1491 info->mallocs = irecv[4]; 1492 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1493 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1494 info->fill_ratio_needed = 0; 1495 info->factor_mallocs = 0; 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1500 { 1501 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1502 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1503 1504 PetscFunctionBegin; 1505 switch (op) { 1506 case MAT_NEW_NONZERO_LOCATIONS: 1507 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1508 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1509 case MAT_KEEP_NONZERO_PATTERN: 1510 case MAT_NEW_NONZERO_LOCATION_ERR: 1511 MatCheckPreallocated(A, 1); 1512 PetscCall(MatSetOption(a->A, op, flg)); 1513 PetscCall(MatSetOption(a->B, op, flg)); 1514 break; 1515 case MAT_ROW_ORIENTED: 1516 MatCheckPreallocated(A, 1); 1517 a->roworiented = flg; 1518 1519 PetscCall(MatSetOption(a->A, op, flg)); 1520 PetscCall(MatSetOption(a->B, op, flg)); 1521 break; 1522 case MAT_IGNORE_OFF_PROC_ENTRIES: 1523 a->donotstash = flg; 1524 break; 1525 case MAT_USE_HASH_TABLE: 1526 a->ht_flag = flg; 1527 break; 1528 case MAT_HERMITIAN: 1529 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1530 #if defined(PETSC_USE_COMPLEX) 1531 if (flg) { /* need different mat-vec ops */ 1532 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1533 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1534 A->ops->multtranspose = NULL; 1535 A->ops->multtransposeadd = NULL; 1536 A->symmetric = PETSC_BOOL3_FALSE; 1537 } 1538 #endif 1539 break; 1540 case MAT_SPD: 1541 case MAT_SYMMETRIC: 1542 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1543 #if defined(PETSC_USE_COMPLEX) 1544 if (flg) { /* restore to use default mat-vec ops */ 1545 A->ops->mult = MatMult_MPISBAIJ; 1546 A->ops->multadd = MatMultAdd_MPISBAIJ; 1547 A->ops->multtranspose = MatMult_MPISBAIJ; 1548 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1549 } 1550 #endif 1551 break; 1552 case MAT_STRUCTURALLY_SYMMETRIC: 1553 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1554 break; 1555 case MAT_IGNORE_LOWER_TRIANGULAR: 1556 case MAT_ERROR_LOWER_TRIANGULAR: 1557 aA->ignore_ltriangular = flg; 1558 break; 1559 case MAT_GETROW_UPPERTRIANGULAR: 1560 aA->getrow_utriangular = flg; 1561 break; 1562 default: 1563 break; 1564 } 1565 PetscFunctionReturn(PETSC_SUCCESS); 1566 } 1567 1568 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1569 { 1570 PetscFunctionBegin; 1571 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1572 if (reuse == MAT_INITIAL_MATRIX) { 1573 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1574 } else if (reuse == MAT_REUSE_MATRIX) { 1575 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1576 } 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1581 { 1582 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1583 Mat a = baij->A, b = baij->B; 1584 PetscInt nv, m, n; 1585 PetscBool flg; 1586 1587 PetscFunctionBegin; 1588 if (ll != rr) { 1589 PetscCall(VecEqual(ll, rr, &flg)); 1590 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1591 } 1592 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1593 1594 PetscCall(MatGetLocalSize(mat, &m, &n)); 1595 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1596 1597 PetscCall(VecGetLocalSize(rr, &nv)); 1598 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1599 1600 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1601 1602 /* left diagonalscale the off-diagonal part */ 1603 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1604 1605 /* scale the diagonal part */ 1606 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1607 1608 /* right diagonalscale the off-diagonal part */ 1609 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1610 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1611 PetscFunctionReturn(PETSC_SUCCESS); 1612 } 1613 1614 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1615 { 1616 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1617 1618 PetscFunctionBegin; 1619 PetscCall(MatSetUnfactored(a->A)); 1620 PetscFunctionReturn(PETSC_SUCCESS); 1621 } 1622 1623 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1624 1625 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1626 { 1627 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1628 Mat a, b, c, d; 1629 PetscBool flg; 1630 1631 PetscFunctionBegin; 1632 a = matA->A; 1633 b = matA->B; 1634 c = matB->A; 1635 d = matB->B; 1636 1637 PetscCall(MatEqual(a, c, &flg)); 1638 if (flg) PetscCall(MatEqual(b, d, &flg)); 1639 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1640 PetscFunctionReturn(PETSC_SUCCESS); 1641 } 1642 1643 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1644 { 1645 PetscBool isbaij; 1646 1647 PetscFunctionBegin; 1648 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1649 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1650 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1651 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1652 PetscCall(MatGetRowUpperTriangular(A)); 1653 PetscCall(MatCopy_Basic(A, B, str)); 1654 PetscCall(MatRestoreRowUpperTriangular(A)); 1655 } else { 1656 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1657 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1658 1659 PetscCall(MatCopy(a->A, b->A, str)); 1660 PetscCall(MatCopy(a->B, b->B, str)); 1661 } 1662 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1663 PetscFunctionReturn(PETSC_SUCCESS); 1664 } 1665 1666 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1667 { 1668 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1669 PetscBLASInt bnz, one = 1; 1670 Mat_SeqSBAIJ *xa, *ya; 1671 Mat_SeqBAIJ *xb, *yb; 1672 1673 PetscFunctionBegin; 1674 if (str == SAME_NONZERO_PATTERN) { 1675 PetscScalar alpha = a; 1676 xa = (Mat_SeqSBAIJ *)xx->A->data; 1677 ya = (Mat_SeqSBAIJ *)yy->A->data; 1678 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1679 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1680 xb = (Mat_SeqBAIJ *)xx->B->data; 1681 yb = (Mat_SeqBAIJ *)yy->B->data; 1682 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1683 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1684 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1685 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1686 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1687 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1688 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1689 } else { 1690 Mat B; 1691 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1692 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1693 PetscCall(MatGetRowUpperTriangular(X)); 1694 PetscCall(MatGetRowUpperTriangular(Y)); 1695 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1696 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1697 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1698 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1699 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1700 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1701 PetscCall(MatSetType(B, MATMPISBAIJ)); 1702 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1703 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1704 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1705 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1706 PetscCall(MatHeaderMerge(Y, &B)); 1707 PetscCall(PetscFree(nnz_d)); 1708 PetscCall(PetscFree(nnz_o)); 1709 PetscCall(MatRestoreRowUpperTriangular(X)); 1710 PetscCall(MatRestoreRowUpperTriangular(Y)); 1711 } 1712 PetscFunctionReturn(PETSC_SUCCESS); 1713 } 1714 1715 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1716 { 1717 PetscInt i; 1718 PetscBool flg; 1719 1720 PetscFunctionBegin; 1721 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1722 for (i = 0; i < n; i++) { 1723 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1724 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1725 } 1726 PetscFunctionReturn(PETSC_SUCCESS); 1727 } 1728 1729 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1730 { 1731 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1732 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1733 1734 PetscFunctionBegin; 1735 if (!Y->preallocated) { 1736 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1737 } else if (!aij->nz) { 1738 PetscInt nonew = aij->nonew; 1739 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1740 aij->nonew = nonew; 1741 } 1742 PetscCall(MatShift_Basic(Y, a)); 1743 PetscFunctionReturn(PETSC_SUCCESS); 1744 } 1745 1746 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1747 { 1748 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1749 1750 PetscFunctionBegin; 1751 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1752 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1753 if (d) { 1754 PetscInt rstart; 1755 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1756 *d += rstart / A->rmap->bs; 1757 } 1758 PetscFunctionReturn(PETSC_SUCCESS); 1759 } 1760 1761 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1762 { 1763 PetscFunctionBegin; 1764 *a = ((Mat_MPISBAIJ *)A->data)->A; 1765 PetscFunctionReturn(PETSC_SUCCESS); 1766 } 1767 1768 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1769 { 1770 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1771 1772 PetscFunctionBegin; 1773 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1774 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1775 PetscFunctionReturn(PETSC_SUCCESS); 1776 } 1777 1778 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer); 1779 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]); 1780 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 1781 1782 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1783 MatGetRow_MPISBAIJ, 1784 MatRestoreRow_MPISBAIJ, 1785 MatMult_MPISBAIJ, 1786 /* 4*/ MatMultAdd_MPISBAIJ, 1787 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1788 MatMultAdd_MPISBAIJ, 1789 NULL, 1790 NULL, 1791 NULL, 1792 /* 10*/ NULL, 1793 NULL, 1794 NULL, 1795 MatSOR_MPISBAIJ, 1796 MatTranspose_MPISBAIJ, 1797 /* 15*/ MatGetInfo_MPISBAIJ, 1798 MatEqual_MPISBAIJ, 1799 MatGetDiagonal_MPISBAIJ, 1800 MatDiagonalScale_MPISBAIJ, 1801 MatNorm_MPISBAIJ, 1802 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1803 MatAssemblyEnd_MPISBAIJ, 1804 MatSetOption_MPISBAIJ, 1805 MatZeroEntries_MPISBAIJ, 1806 /* 24*/ NULL, 1807 NULL, 1808 NULL, 1809 NULL, 1810 NULL, 1811 /* 29*/ MatSetUp_MPI_Hash, 1812 NULL, 1813 NULL, 1814 MatGetDiagonalBlock_MPISBAIJ, 1815 NULL, 1816 /* 34*/ MatDuplicate_MPISBAIJ, 1817 NULL, 1818 NULL, 1819 NULL, 1820 NULL, 1821 /* 39*/ MatAXPY_MPISBAIJ, 1822 MatCreateSubMatrices_MPISBAIJ, 1823 MatIncreaseOverlap_MPISBAIJ, 1824 MatGetValues_MPISBAIJ, 1825 MatCopy_MPISBAIJ, 1826 /* 44*/ NULL, 1827 MatScale_MPISBAIJ, 1828 MatShift_MPISBAIJ, 1829 NULL, 1830 NULL, 1831 /* 49*/ NULL, 1832 NULL, 1833 NULL, 1834 NULL, 1835 NULL, 1836 /* 54*/ NULL, 1837 NULL, 1838 MatSetUnfactored_MPISBAIJ, 1839 NULL, 1840 MatSetValuesBlocked_MPISBAIJ, 1841 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1842 NULL, 1843 NULL, 1844 NULL, 1845 NULL, 1846 /* 64*/ NULL, 1847 NULL, 1848 NULL, 1849 NULL, 1850 MatGetRowMaxAbs_MPISBAIJ, 1851 /* 69*/ NULL, 1852 MatConvert_MPISBAIJ_Basic, 1853 NULL, 1854 NULL, 1855 NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 NULL, 1860 MatLoad_MPISBAIJ, 1861 /* 79*/ NULL, 1862 NULL, 1863 NULL, 1864 NULL, 1865 NULL, 1866 /* 84*/ NULL, 1867 NULL, 1868 NULL, 1869 NULL, 1870 NULL, 1871 /* 89*/ NULL, 1872 NULL, 1873 NULL, 1874 NULL, 1875 MatConjugate_MPISBAIJ, 1876 /* 94*/ NULL, 1877 NULL, 1878 MatRealPart_MPISBAIJ, 1879 MatImaginaryPart_MPISBAIJ, 1880 MatGetRowUpperTriangular_MPISBAIJ, 1881 /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ, 1882 NULL, 1883 NULL, 1884 NULL, 1885 NULL, 1886 /*104*/ MatMissingDiagonal_MPISBAIJ, 1887 NULL, 1888 NULL, 1889 NULL, 1890 NULL, 1891 /*109*/ NULL, 1892 NULL, 1893 NULL, 1894 NULL, 1895 NULL, 1896 /*114*/ NULL, 1897 NULL, 1898 NULL, 1899 NULL, 1900 NULL, 1901 /*119*/ NULL, 1902 NULL, 1903 NULL, 1904 NULL, 1905 NULL, 1906 /*124*/ NULL, 1907 NULL, 1908 NULL, 1909 MatSetBlockSizes_Default, 1910 NULL, 1911 /*129*/ NULL, 1912 NULL, 1913 MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1914 NULL, 1915 NULL, 1916 /*134*/ NULL, 1917 NULL, 1918 NULL, 1919 MatEliminateZeros_MPISBAIJ, 1920 NULL, 1921 /*139*/ NULL, 1922 NULL, 1923 NULL, 1924 MatCopyHashToXAIJ_MPI_Hash, 1925 NULL}; 1926 1927 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1928 { 1929 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1930 PetscInt i, mbs, Mbs; 1931 PetscMPIInt size; 1932 1933 PetscFunctionBegin; 1934 if (B->hash_active) { 1935 B->ops[0] = b->cops; 1936 B->hash_active = PETSC_FALSE; 1937 } 1938 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1939 PetscCall(MatSetBlockSize(B, bs)); 1940 PetscCall(PetscLayoutSetUp(B->rmap)); 1941 PetscCall(PetscLayoutSetUp(B->cmap)); 1942 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1943 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1944 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1945 1946 mbs = B->rmap->n / bs; 1947 Mbs = B->rmap->N / bs; 1948 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1949 1950 B->rmap->bs = bs; 1951 b->bs2 = bs * bs; 1952 b->mbs = mbs; 1953 b->Mbs = Mbs; 1954 b->nbs = B->cmap->n / bs; 1955 b->Nbs = B->cmap->N / bs; 1956 1957 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1958 b->rstartbs = B->rmap->rstart / bs; 1959 b->rendbs = B->rmap->rend / bs; 1960 1961 b->cstartbs = B->cmap->rstart / bs; 1962 b->cendbs = B->cmap->rend / bs; 1963 1964 #if defined(PETSC_USE_CTABLE) 1965 PetscCall(PetscHMapIDestroy(&b->colmap)); 1966 #else 1967 PetscCall(PetscFree(b->colmap)); 1968 #endif 1969 PetscCall(PetscFree(b->garray)); 1970 PetscCall(VecDestroy(&b->lvec)); 1971 PetscCall(VecScatterDestroy(&b->Mvctx)); 1972 PetscCall(VecDestroy(&b->slvec0)); 1973 PetscCall(VecDestroy(&b->slvec0b)); 1974 PetscCall(VecDestroy(&b->slvec1)); 1975 PetscCall(VecDestroy(&b->slvec1a)); 1976 PetscCall(VecDestroy(&b->slvec1b)); 1977 PetscCall(VecScatterDestroy(&b->sMvctx)); 1978 1979 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1980 1981 MatSeqXAIJGetOptions_Private(b->B); 1982 PetscCall(MatDestroy(&b->B)); 1983 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1984 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1985 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1986 MatSeqXAIJRestoreOptions_Private(b->B); 1987 1988 MatSeqXAIJGetOptions_Private(b->A); 1989 PetscCall(MatDestroy(&b->A)); 1990 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1991 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1992 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1993 MatSeqXAIJRestoreOptions_Private(b->A); 1994 1995 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1996 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1997 1998 B->preallocated = PETSC_TRUE; 1999 B->was_assembled = PETSC_FALSE; 2000 B->assembled = PETSC_FALSE; 2001 PetscFunctionReturn(PETSC_SUCCESS); 2002 } 2003 2004 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2005 { 2006 PetscInt m, rstart, cend; 2007 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2008 const PetscInt *JJ = NULL; 2009 PetscScalar *values = NULL; 2010 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2011 PetscBool nooffprocentries; 2012 2013 PetscFunctionBegin; 2014 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2015 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2016 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2017 PetscCall(PetscLayoutSetUp(B->rmap)); 2018 PetscCall(PetscLayoutSetUp(B->cmap)); 2019 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2020 m = B->rmap->n / bs; 2021 rstart = B->rmap->rstart / bs; 2022 cend = B->cmap->rend / bs; 2023 2024 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2025 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2026 for (i = 0; i < m; i++) { 2027 nz = ii[i + 1] - ii[i]; 2028 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2029 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2030 JJ = jj + ii[i]; 2031 bd = 0; 2032 for (j = 0; j < nz; j++) { 2033 if (*JJ >= i + rstart) break; 2034 JJ++; 2035 bd++; 2036 } 2037 d = 0; 2038 for (; j < nz; j++) { 2039 if (*JJ++ >= cend) break; 2040 d++; 2041 } 2042 d_nnz[i] = d; 2043 o_nnz[i] = nz - d - bd; 2044 nz = nz - bd; 2045 nz_max = PetscMax(nz_max, nz); 2046 } 2047 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2048 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2049 PetscCall(PetscFree2(d_nnz, o_nnz)); 2050 2051 values = (PetscScalar *)V; 2052 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2053 for (i = 0; i < m; i++) { 2054 PetscInt row = i + rstart; 2055 PetscInt ncols = ii[i + 1] - ii[i]; 2056 const PetscInt *icols = jj + ii[i]; 2057 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2058 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2059 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2060 } else { /* block ordering does not match so we can only insert one block at a time. */ 2061 PetscInt j; 2062 for (j = 0; j < ncols; j++) { 2063 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2064 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2065 } 2066 } 2067 } 2068 2069 if (!V) PetscCall(PetscFree(values)); 2070 nooffprocentries = B->nooffprocentries; 2071 B->nooffprocentries = PETSC_TRUE; 2072 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2073 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2074 B->nooffprocentries = nooffprocentries; 2075 2076 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 /*MC 2081 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2082 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2083 the matrix is stored. 2084 2085 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2086 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2087 2088 Options Database Key: 2089 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2090 2091 Level: beginner 2092 2093 Note: 2094 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2095 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2096 2097 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2098 M*/ 2099 2100 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2101 { 2102 Mat_MPISBAIJ *b; 2103 PetscBool flg = PETSC_FALSE; 2104 2105 PetscFunctionBegin; 2106 PetscCall(PetscNew(&b)); 2107 B->data = (void *)b; 2108 B->ops[0] = MatOps_Values; 2109 2110 B->ops->destroy = MatDestroy_MPISBAIJ; 2111 B->ops->view = MatView_MPISBAIJ; 2112 B->assembled = PETSC_FALSE; 2113 B->insertmode = NOT_SET_VALUES; 2114 2115 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2116 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2117 2118 /* build local table of row and column ownerships */ 2119 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2120 2121 /* build cache for off array entries formed */ 2122 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2123 2124 b->donotstash = PETSC_FALSE; 2125 b->colmap = NULL; 2126 b->garray = NULL; 2127 b->roworiented = PETSC_TRUE; 2128 2129 /* stuff used in block assembly */ 2130 b->barray = NULL; 2131 2132 /* stuff used for matrix vector multiply */ 2133 b->lvec = NULL; 2134 b->Mvctx = NULL; 2135 b->slvec0 = NULL; 2136 b->slvec0b = NULL; 2137 b->slvec1 = NULL; 2138 b->slvec1a = NULL; 2139 b->slvec1b = NULL; 2140 b->sMvctx = NULL; 2141 2142 /* stuff for MatGetRow() */ 2143 b->rowindices = NULL; 2144 b->rowvalues = NULL; 2145 b->getrowactive = PETSC_FALSE; 2146 2147 /* hash table stuff */ 2148 b->ht = NULL; 2149 b->hd = NULL; 2150 b->ht_size = 0; 2151 b->ht_flag = PETSC_FALSE; 2152 b->ht_fact = 0; 2153 b->ht_total_ct = 0; 2154 b->ht_insert_ct = 0; 2155 2156 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2157 b->ijonly = PETSC_FALSE; 2158 2159 b->in_loc = NULL; 2160 b->v_loc = NULL; 2161 b->n_loc = 0; 2162 2163 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2164 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2165 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2166 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2167 #if defined(PETSC_HAVE_ELEMENTAL) 2168 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2169 #endif 2170 #if defined(PETSC_HAVE_SCALAPACK) 2171 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2172 #endif 2173 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2174 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2175 2176 B->symmetric = PETSC_BOOL3_TRUE; 2177 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2178 B->symmetry_eternal = PETSC_TRUE; 2179 B->structural_symmetry_eternal = PETSC_TRUE; 2180 #if defined(PETSC_USE_COMPLEX) 2181 B->hermitian = PETSC_BOOL3_FALSE; 2182 #else 2183 B->hermitian = PETSC_BOOL3_TRUE; 2184 #endif 2185 2186 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2187 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2188 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2189 if (flg) { 2190 PetscReal fact = 1.39; 2191 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2192 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2193 if (fact <= 1.0) fact = 1.39; 2194 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2195 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2196 } 2197 PetscOptionsEnd(); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2202 /*MC 2203 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2204 2205 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2206 and `MATMPISBAIJ` otherwise. 2207 2208 Options Database Key: 2209 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2210 2211 Level: beginner 2212 2213 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2214 M*/ 2215 2216 /*@ 2217 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2218 the user should preallocate the matrix storage by setting the parameters 2219 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2220 performance can be increased by more than a factor of 50. 2221 2222 Collective 2223 2224 Input Parameters: 2225 + B - the matrix 2226 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2227 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2228 . d_nz - number of block nonzeros per block row in diagonal portion of local 2229 submatrix (same for all local rows) 2230 . d_nnz - array containing the number of block nonzeros in the various block rows 2231 in the upper triangular and diagonal part of the in diagonal portion of the local 2232 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2233 for the diagonal entry and set a value even if it is zero. 2234 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2235 submatrix (same for all local rows). 2236 - o_nnz - array containing the number of nonzeros in the various block rows of the 2237 off-diagonal portion of the local submatrix that is right of the diagonal 2238 (possibly different for each block row) or `NULL`. 2239 2240 Options Database Keys: 2241 + -mat_no_unroll - uses code that does not unroll the loops in the 2242 block calculations (much slower) 2243 - -mat_block_size - size of the blocks to use 2244 2245 Level: intermediate 2246 2247 Notes: 2248 2249 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2250 than it must be used on all processors that share the object for that argument. 2251 2252 If the *_nnz parameter is given then the *_nz parameter is ignored 2253 2254 Storage Information: 2255 For a square global matrix we define each processor's diagonal portion 2256 to be its local rows and the corresponding columns (a square submatrix); 2257 each processor's off-diagonal portion encompasses the remainder of the 2258 local matrix (a rectangular submatrix). 2259 2260 The user can specify preallocated storage for the diagonal part of 2261 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2262 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2263 memory allocation. Likewise, specify preallocated storage for the 2264 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2265 2266 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2267 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2268 You can also run with the option `-info` and look for messages with the string 2269 malloc in them to see if additional memory allocation was needed. 2270 2271 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2272 the figure below we depict these three local rows and all columns (0-11). 2273 2274 .vb 2275 0 1 2 3 4 5 6 7 8 9 10 11 2276 -------------------------- 2277 row 3 |. . . d d d o o o o o o 2278 row 4 |. . . d d d o o o o o o 2279 row 5 |. . . d d d o o o o o o 2280 -------------------------- 2281 .ve 2282 2283 Thus, any entries in the d locations are stored in the d (diagonal) 2284 submatrix, and any entries in the o locations are stored in the 2285 o (off-diagonal) submatrix. Note that the d matrix is stored in 2286 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2287 2288 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2289 plus the diagonal part of the d matrix, 2290 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2291 2292 In general, for PDE problems in which most nonzeros are near the diagonal, 2293 one expects `d_nz` >> `o_nz`. 2294 2295 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2296 @*/ 2297 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2298 { 2299 PetscFunctionBegin; 2300 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2301 PetscValidType(B, 1); 2302 PetscValidLogicalCollectiveInt(B, bs, 2); 2303 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2304 PetscFunctionReturn(PETSC_SUCCESS); 2305 } 2306 2307 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2308 /*@ 2309 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2310 (block compressed row). For good matrix assembly performance 2311 the user should preallocate the matrix storage by setting the parameters 2312 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2313 2314 Collective 2315 2316 Input Parameters: 2317 + comm - MPI communicator 2318 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2319 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2320 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2321 This value should be the same as the local size used in creating the 2322 y vector for the matrix-vector product y = Ax. 2323 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2324 This value should be the same as the local size used in creating the 2325 x vector for the matrix-vector product y = Ax. 2326 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2327 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2328 . d_nz - number of block nonzeros per block row in diagonal portion of local 2329 submatrix (same for all local rows) 2330 . d_nnz - array containing the number of block nonzeros in the various block rows 2331 in the upper triangular portion of the in diagonal portion of the local 2332 (possibly different for each block block row) or `NULL`. 2333 If you plan to factor the matrix you must leave room for the diagonal entry and 2334 set its value even if it is zero. 2335 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2336 submatrix (same for all local rows). 2337 - o_nnz - array containing the number of nonzeros in the various block rows of the 2338 off-diagonal portion of the local submatrix (possibly different for 2339 each block row) or `NULL`. 2340 2341 Output Parameter: 2342 . A - the matrix 2343 2344 Options Database Keys: 2345 + -mat_no_unroll - uses code that does not unroll the loops in the 2346 block calculations (much slower) 2347 . -mat_block_size - size of the blocks to use 2348 - -mat_mpi - use the parallel matrix data structures even on one processor 2349 (defaults to using SeqBAIJ format on one processor) 2350 2351 Level: intermediate 2352 2353 Notes: 2354 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2355 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2356 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2357 2358 The number of rows and columns must be divisible by blocksize. 2359 This matrix type does not support complex Hermitian operation. 2360 2361 The user MUST specify either the local or global matrix dimensions 2362 (possibly both). 2363 2364 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2365 than it must be used on all processors that share the object for that argument. 2366 2367 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by 2368 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 2369 2370 If the *_nnz parameter is given then the *_nz parameter is ignored 2371 2372 Storage Information: 2373 For a square global matrix we define each processor's diagonal portion 2374 to be its local rows and the corresponding columns (a square submatrix); 2375 each processor's off-diagonal portion encompasses the remainder of the 2376 local matrix (a rectangular submatrix). 2377 2378 The user can specify preallocated storage for the diagonal part of 2379 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2380 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2381 memory allocation. Likewise, specify preallocated storage for the 2382 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2383 2384 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2385 the figure below we depict these three local rows and all columns (0-11). 2386 2387 .vb 2388 0 1 2 3 4 5 6 7 8 9 10 11 2389 -------------------------- 2390 row 3 |. . . d d d o o o o o o 2391 row 4 |. . . d d d o o o o o o 2392 row 5 |. . . d d d o o o o o o 2393 -------------------------- 2394 .ve 2395 2396 Thus, any entries in the d locations are stored in the d (diagonal) 2397 submatrix, and any entries in the o locations are stored in the 2398 o (off-diagonal) submatrix. Note that the d matrix is stored in 2399 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2400 2401 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2402 plus the diagonal part of the d matrix, 2403 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2404 In general, for PDE problems in which most nonzeros are near the diagonal, 2405 one expects `d_nz` >> `o_nz`. 2406 2407 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, 2408 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 2409 @*/ 2410 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2411 { 2412 PetscMPIInt size; 2413 2414 PetscFunctionBegin; 2415 PetscCall(MatCreate(comm, A)); 2416 PetscCall(MatSetSizes(*A, m, n, M, N)); 2417 PetscCallMPI(MPI_Comm_size(comm, &size)); 2418 if (size > 1) { 2419 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2420 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2421 } else { 2422 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2423 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2424 } 2425 PetscFunctionReturn(PETSC_SUCCESS); 2426 } 2427 2428 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2429 { 2430 Mat mat; 2431 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2432 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2433 PetscScalar *array; 2434 2435 PetscFunctionBegin; 2436 *newmat = NULL; 2437 2438 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2439 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2440 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2441 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2442 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2443 2444 if (matin->hash_active) { 2445 PetscCall(MatSetUp(mat)); 2446 } else { 2447 mat->factortype = matin->factortype; 2448 mat->preallocated = PETSC_TRUE; 2449 mat->assembled = PETSC_TRUE; 2450 mat->insertmode = NOT_SET_VALUES; 2451 2452 a = (Mat_MPISBAIJ *)mat->data; 2453 a->bs2 = oldmat->bs2; 2454 a->mbs = oldmat->mbs; 2455 a->nbs = oldmat->nbs; 2456 a->Mbs = oldmat->Mbs; 2457 a->Nbs = oldmat->Nbs; 2458 2459 a->size = oldmat->size; 2460 a->rank = oldmat->rank; 2461 a->donotstash = oldmat->donotstash; 2462 a->roworiented = oldmat->roworiented; 2463 a->rowindices = NULL; 2464 a->rowvalues = NULL; 2465 a->getrowactive = PETSC_FALSE; 2466 a->barray = NULL; 2467 a->rstartbs = oldmat->rstartbs; 2468 a->rendbs = oldmat->rendbs; 2469 a->cstartbs = oldmat->cstartbs; 2470 a->cendbs = oldmat->cendbs; 2471 2472 /* hash table stuff */ 2473 a->ht = NULL; 2474 a->hd = NULL; 2475 a->ht_size = 0; 2476 a->ht_flag = oldmat->ht_flag; 2477 a->ht_fact = oldmat->ht_fact; 2478 a->ht_total_ct = 0; 2479 a->ht_insert_ct = 0; 2480 2481 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2482 if (oldmat->colmap) { 2483 #if defined(PETSC_USE_CTABLE) 2484 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2485 #else 2486 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2487 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2488 #endif 2489 } else a->colmap = NULL; 2490 2491 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 2492 PetscCall(PetscMalloc1(len, &a->garray)); 2493 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2494 } else a->garray = NULL; 2495 2496 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2497 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2498 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2499 2500 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2501 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2502 2503 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2504 PetscCall(VecGetArray(a->slvec1, &array)); 2505 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2506 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2507 PetscCall(VecRestoreArray(a->slvec1, &array)); 2508 PetscCall(VecGetArray(a->slvec0, &array)); 2509 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2510 PetscCall(VecRestoreArray(a->slvec0, &array)); 2511 2512 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2513 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2514 a->sMvctx = oldmat->sMvctx; 2515 2516 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2517 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2518 } 2519 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2520 *newmat = mat; 2521 PetscFunctionReturn(PETSC_SUCCESS); 2522 } 2523 2524 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2525 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2526 2527 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2528 { 2529 PetscBool isbinary; 2530 2531 PetscFunctionBegin; 2532 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2533 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2534 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2535 PetscFunctionReturn(PETSC_SUCCESS); 2536 } 2537 2538 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2539 { 2540 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2541 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data; 2542 PetscReal atmp; 2543 PetscReal *work, *svalues, *rvalues; 2544 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2545 PetscMPIInt rank, size; 2546 PetscInt *rowners_bs, count, source; 2547 PetscScalar *va; 2548 MatScalar *ba; 2549 MPI_Status stat; 2550 2551 PetscFunctionBegin; 2552 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2553 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2554 PetscCall(VecGetArray(v, &va)); 2555 2556 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2557 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2558 2559 bs = A->rmap->bs; 2560 mbs = a->mbs; 2561 Mbs = a->Mbs; 2562 ba = b->a; 2563 bi = b->i; 2564 bj = b->j; 2565 2566 /* find ownerships */ 2567 rowners_bs = A->rmap->range; 2568 2569 /* each proc creates an array to be distributed */ 2570 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2571 2572 /* row_max for B */ 2573 if (rank != size - 1) { 2574 for (i = 0; i < mbs; i++) { 2575 ncols = bi[1] - bi[0]; 2576 bi++; 2577 brow = bs * i; 2578 for (j = 0; j < ncols; j++) { 2579 bcol = bs * (*bj); 2580 for (kcol = 0; kcol < bs; kcol++) { 2581 col = bcol + kcol; /* local col index */ 2582 col += rowners_bs[rank + 1]; /* global col index */ 2583 for (krow = 0; krow < bs; krow++) { 2584 atmp = PetscAbsScalar(*ba); 2585 ba++; 2586 row = brow + krow; /* local row index */ 2587 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2588 if (work[col] < atmp) work[col] = atmp; 2589 } 2590 } 2591 bj++; 2592 } 2593 } 2594 2595 /* send values to its owners */ 2596 for (PetscMPIInt dest = rank + 1; dest < size; dest++) { 2597 svalues = work + rowners_bs[dest]; 2598 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2599 PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2600 } 2601 } 2602 2603 /* receive values */ 2604 if (rank) { 2605 rvalues = work; 2606 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2607 for (source = 0; source < rank; source++) { 2608 PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2609 /* process values */ 2610 for (i = 0; i < count; i++) { 2611 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2612 } 2613 } 2614 } 2615 2616 PetscCall(VecRestoreArray(v, &va)); 2617 PetscCall(PetscFree(work)); 2618 PetscFunctionReturn(PETSC_SUCCESS); 2619 } 2620 2621 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2622 { 2623 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2624 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2625 PetscScalar *x, *ptr, *from; 2626 Vec bb1; 2627 const PetscScalar *b; 2628 2629 PetscFunctionBegin; 2630 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2631 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2632 2633 if (flag == SOR_APPLY_UPPER) { 2634 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2635 PetscFunctionReturn(PETSC_SUCCESS); 2636 } 2637 2638 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2639 if (flag & SOR_ZERO_INITIAL_GUESS) { 2640 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2641 its--; 2642 } 2643 2644 PetscCall(VecDuplicate(bb, &bb1)); 2645 while (its--) { 2646 /* lower triangular part: slvec0b = - B^T*xx */ 2647 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2648 2649 /* copy xx into slvec0a */ 2650 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2651 PetscCall(VecGetArray(xx, &x)); 2652 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2653 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2654 2655 PetscCall(VecScale(mat->slvec0, -1.0)); 2656 2657 /* copy bb into slvec1a */ 2658 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2659 PetscCall(VecGetArrayRead(bb, &b)); 2660 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2661 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2662 2663 /* set slvec1b = 0 */ 2664 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2665 PetscCall(VecZeroEntries(mat->slvec1b)); 2666 2667 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2668 PetscCall(VecRestoreArray(xx, &x)); 2669 PetscCall(VecRestoreArrayRead(bb, &b)); 2670 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2671 2672 /* upper triangular part: bb1 = bb1 - B*x */ 2673 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2674 2675 /* local diagonal sweep */ 2676 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2677 } 2678 PetscCall(VecDestroy(&bb1)); 2679 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2680 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2681 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2682 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2683 } else if (flag & SOR_EISENSTAT) { 2684 Vec xx1; 2685 PetscBool hasop; 2686 const PetscScalar *diag; 2687 PetscScalar *sl, scale = (omega - 2.0) / omega; 2688 PetscInt i, n; 2689 2690 if (!mat->xx1) { 2691 PetscCall(VecDuplicate(bb, &mat->xx1)); 2692 PetscCall(VecDuplicate(bb, &mat->bb1)); 2693 } 2694 xx1 = mat->xx1; 2695 bb1 = mat->bb1; 2696 2697 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2698 2699 if (!mat->diag) { 2700 /* this is wrong for same matrix with new nonzero values */ 2701 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2702 PetscCall(MatGetDiagonal(matin, mat->diag)); 2703 } 2704 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2705 2706 if (hasop) { 2707 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2708 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2709 } else { 2710 /* 2711 These two lines are replaced by code that may be a bit faster for a good compiler 2712 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2713 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2714 */ 2715 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2716 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2717 PetscCall(VecGetArrayRead(bb, &b)); 2718 PetscCall(VecGetArray(xx, &x)); 2719 PetscCall(VecGetLocalSize(xx, &n)); 2720 if (omega == 1.0) { 2721 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2722 PetscCall(PetscLogFlops(2.0 * n)); 2723 } else { 2724 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2725 PetscCall(PetscLogFlops(3.0 * n)); 2726 } 2727 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2728 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2729 PetscCall(VecRestoreArrayRead(bb, &b)); 2730 PetscCall(VecRestoreArray(xx, &x)); 2731 } 2732 2733 /* multiply off-diagonal portion of matrix */ 2734 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2735 PetscCall(VecZeroEntries(mat->slvec1b)); 2736 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2737 PetscCall(VecGetArray(mat->slvec0, &from)); 2738 PetscCall(VecGetArray(xx, &x)); 2739 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2740 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2741 PetscCall(VecRestoreArray(xx, &x)); 2742 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2743 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2744 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2745 2746 /* local sweep */ 2747 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2748 PetscCall(VecAXPY(xx, 1.0, xx1)); 2749 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2750 PetscFunctionReturn(PETSC_SUCCESS); 2751 } 2752 2753 /*@ 2754 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows. 2755 2756 Collective 2757 2758 Input Parameters: 2759 + comm - MPI communicator 2760 . bs - the block size, only a block size of 1 is supported 2761 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2762 . n - This value should be the same as the local size used in creating the 2763 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 2764 calculated if `N` is given) For square matrices `n` is almost always `m`. 2765 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2766 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2767 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2768 . j - column indices 2769 - a - matrix values 2770 2771 Output Parameter: 2772 . mat - the matrix 2773 2774 Level: intermediate 2775 2776 Notes: 2777 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2778 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2779 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2780 2781 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2782 2783 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2784 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()` 2785 @*/ 2786 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2787 { 2788 PetscFunctionBegin; 2789 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2790 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2791 PetscCall(MatCreate(comm, mat)); 2792 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2793 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2794 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2795 PetscFunctionReturn(PETSC_SUCCESS); 2796 } 2797 2798 /*@ 2799 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2800 2801 Collective 2802 2803 Input Parameters: 2804 + B - the matrix 2805 . bs - the block size 2806 . i - the indices into `j` for the start of each local row (indices start with zero) 2807 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 2808 - v - optional values in the matrix, pass `NULL` if not provided 2809 2810 Level: advanced 2811 2812 Notes: 2813 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2814 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2815 called this routine. 2816 2817 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2818 and usually the numerical values as well 2819 2820 Any entries passed in that are below the diagonal are ignored 2821 2822 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 2823 `MatCreateMPISBAIJWithArrays()` 2824 @*/ 2825 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2826 { 2827 PetscFunctionBegin; 2828 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2829 PetscFunctionReturn(PETSC_SUCCESS); 2830 } 2831 2832 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2833 { 2834 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2835 PetscInt *indx; 2836 PetscScalar *values; 2837 2838 PetscFunctionBegin; 2839 PetscCall(MatGetSize(inmat, &m, &N)); 2840 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2841 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2842 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2843 PetscInt *bindx, rmax = a->rmax, j; 2844 PetscMPIInt rank, size; 2845 2846 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2847 mbs = m / bs; 2848 Nbs = N / cbs; 2849 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2850 nbs = n / cbs; 2851 2852 PetscCall(PetscMalloc1(rmax, &bindx)); 2853 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2854 2855 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2856 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2857 if (rank == size - 1) { 2858 /* Check sum(nbs) = Nbs */ 2859 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2860 } 2861 2862 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2863 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2864 for (i = 0; i < mbs; i++) { 2865 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2866 nnz = nnz / bs; 2867 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2868 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2869 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2870 } 2871 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2872 PetscCall(PetscFree(bindx)); 2873 2874 PetscCall(MatCreate(comm, outmat)); 2875 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2876 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2877 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2878 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2879 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2880 MatPreallocateEnd(dnz, onz); 2881 } 2882 2883 /* numeric phase */ 2884 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2885 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2886 2887 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2888 for (i = 0; i < m; i++) { 2889 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2890 Ii = i + rstart; 2891 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2892 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2893 } 2894 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2895 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2896 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2897 PetscFunctionReturn(PETSC_SUCCESS); 2898 } 2899