1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 a_noinsert:; \ 238 ailen[brow] = nrow; \ 239 } while (0) 240 241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 242 do { \ 243 brow = row / bs; \ 244 rp = bj + bi[brow]; \ 245 ap = ba + bs2 * bi[brow]; \ 246 rmax = bimax[brow]; \ 247 nrow = bilen[brow]; \ 248 bcol = col / bs; \ 249 ridx = row % bs; \ 250 cidx = col % bs; \ 251 low = 0; \ 252 high = nrow; \ 253 while (high - low > 3) { \ 254 t = (low + high) / 2; \ 255 if (rp[t] > bcol) high = t; \ 256 else low = t; \ 257 } \ 258 for (_i = low; _i < high; _i++) { \ 259 if (rp[_i] > bcol) break; \ 260 if (rp[_i] == bcol) { \ 261 bap = ap + bs2 * _i + bs * cidx + ridx; \ 262 if (addv == ADD_VALUES) *bap += value; \ 263 else *bap = value; \ 264 goto b_noinsert; \ 265 } \ 266 } \ 267 if (b->nonew == 1) goto b_noinsert; \ 268 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 269 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 270 N = nrow++ - 1; \ 271 /* shift up all the later entries in this row */ \ 272 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 273 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 274 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 275 rp[_i] = bcol; \ 276 ap[bs2 * _i + bs * cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } while (0) 280 281 /* Only add/insert a(i,j) with i<=j (blocks). 282 Any a(i,j) with i>j input by user is ignored or generates an error 283 */ 284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 285 { 286 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 287 MatScalar value; 288 PetscBool roworiented = baij->roworiented; 289 PetscInt i, j, row, col; 290 PetscInt rstart_orig = mat->rmap->rstart; 291 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 292 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 293 294 /* Some Variables required in the macro */ 295 Mat A = baij->A; 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 297 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 298 MatScalar *aa = a->a; 299 300 Mat B = baij->B; 301 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 302 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 303 MatScalar *ba = b->a; 304 305 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 306 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 307 MatScalar *ap, *bap; 308 309 /* for stash */ 310 PetscInt n_loc, *in_loc = NULL; 311 MatScalar *v_loc = NULL; 312 313 PetscFunctionBegin; 314 if (!baij->donotstash) { 315 if (n > baij->n_loc) { 316 PetscCall(PetscFree(baij->in_loc)); 317 PetscCall(PetscFree(baij->v_loc)); 318 PetscCall(PetscMalloc1(n, &baij->in_loc)); 319 PetscCall(PetscMalloc1(n, &baij->v_loc)); 320 321 baij->n_loc = n; 322 } 323 in_loc = baij->in_loc; 324 v_loc = baij->v_loc; 325 } 326 327 for (i = 0; i < m; i++) { 328 if (im[i] < 0) continue; 329 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 330 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 331 row = im[i] - rstart_orig; /* local row index */ 332 for (j = 0; j < n; j++) { 333 if (im[i] / bs > in[j] / bs) { 334 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 335 continue; /* ignore lower triangular blocks */ 336 } 337 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 338 col = in[j] - cstart_orig; /* local col index */ 339 brow = row / bs; 340 bcol = col / bs; 341 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 342 if (roworiented) value = v[i * n + j]; 343 else value = v[i + j * m]; 344 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 345 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 346 } else if (in[j] < 0) { 347 continue; 348 } else { 349 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 350 /* off-diag entry (B) */ 351 if (mat->was_assembled) { 352 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 353 #if defined(PETSC_USE_CTABLE) 354 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 355 col = col - 1; 356 #else 357 col = baij->colmap[in[j] / bs] - 1; 358 #endif 359 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) { 360 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 361 col = in[j]; 362 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 363 B = baij->B; 364 b = (Mat_SeqBAIJ *)B->data; 365 bimax = b->imax; 366 bi = b->i; 367 bilen = b->ilen; 368 bj = b->j; 369 ba = b->a; 370 } else col += in[j] % bs; 371 } else col = in[j]; 372 if (roworiented) value = v[i * n + j]; 373 else value = v[i + j * m]; 374 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 375 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 376 } 377 } 378 } else { /* off processor entry */ 379 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 380 if (!baij->donotstash) { 381 mat->assembled = PETSC_FALSE; 382 n_loc = 0; 383 for (j = 0; j < n; j++) { 384 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 385 in_loc[n_loc] = in[j]; 386 if (roworiented) { 387 v_loc[n_loc] = v[i * n + j]; 388 } else { 389 v_loc[n_loc] = v[j * m + i]; 390 } 391 n_loc++; 392 } 393 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 394 } 395 } 396 } 397 PetscFunctionReturn(PETSC_SUCCESS); 398 } 399 400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 401 { 402 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 403 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 404 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 405 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 406 PetscBool roworiented = a->roworiented; 407 const PetscScalar *value = v; 408 MatScalar *ap, *aa = a->a, *bap; 409 410 PetscFunctionBegin; 411 if (col < row) { 412 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 413 PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 414 } 415 rp = aj + ai[row]; 416 ap = aa + bs2 * ai[row]; 417 rmax = imax[row]; 418 nrow = ailen[row]; 419 value = v; 420 low = 0; 421 high = nrow; 422 423 while (high - low > 7) { 424 t = (low + high) / 2; 425 if (rp[t] > col) high = t; 426 else low = t; 427 } 428 for (i = low; i < high; i++) { 429 if (rp[i] > col) break; 430 if (rp[i] == col) { 431 bap = ap + bs2 * i; 432 if (roworiented) { 433 if (is == ADD_VALUES) { 434 for (ii = 0; ii < bs; ii++) { 435 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 436 } 437 } else { 438 for (ii = 0; ii < bs; ii++) { 439 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 440 } 441 } 442 } else { 443 if (is == ADD_VALUES) { 444 for (ii = 0; ii < bs; ii++) { 445 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 446 } 447 } else { 448 for (ii = 0; ii < bs; ii++) { 449 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 450 } 451 } 452 } 453 goto noinsert2; 454 } 455 } 456 if (nonew == 1) goto noinsert2; 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 458 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 459 N = nrow++ - 1; 460 high++; 461 /* shift up all the later entries in this row */ 462 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 463 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 464 rp[i] = col; 465 bap = ap + bs2 * i; 466 if (roworiented) { 467 for (ii = 0; ii < bs; ii++) { 468 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 469 } 470 } else { 471 for (ii = 0; ii < bs; ii++) { 472 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 473 } 474 } 475 noinsert2:; 476 ailen[row] = nrow; 477 PetscFunctionReturn(PETSC_SUCCESS); 478 } 479 480 /* 481 This routine is exactly duplicated in mpibaij.c 482 */ 483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 484 { 485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 486 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 487 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 488 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 489 PetscBool roworiented = a->roworiented; 490 const PetscScalar *value = v; 491 MatScalar *ap, *aa = a->a, *bap; 492 493 PetscFunctionBegin; 494 rp = aj + ai[row]; 495 ap = aa + bs2 * ai[row]; 496 rmax = imax[row]; 497 nrow = ailen[row]; 498 low = 0; 499 high = nrow; 500 value = v; 501 while (high - low > 7) { 502 t = (low + high) / 2; 503 if (rp[t] > col) high = t; 504 else low = t; 505 } 506 for (i = low; i < high; i++) { 507 if (rp[i] > col) break; 508 if (rp[i] == col) { 509 bap = ap + bs2 * i; 510 if (roworiented) { 511 if (is == ADD_VALUES) { 512 for (ii = 0; ii < bs; ii++) { 513 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 514 } 515 } else { 516 for (ii = 0; ii < bs; ii++) { 517 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 518 } 519 } 520 } else { 521 if (is == ADD_VALUES) { 522 for (ii = 0; ii < bs; ii++, value += bs) { 523 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 524 bap += bs; 525 } 526 } else { 527 for (ii = 0; ii < bs; ii++, value += bs) { 528 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 529 bap += bs; 530 } 531 } 532 } 533 goto noinsert2; 534 } 535 } 536 if (nonew == 1) goto noinsert2; 537 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 538 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 539 N = nrow++ - 1; 540 high++; 541 /* shift up all the later entries in this row */ 542 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 543 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 544 rp[i] = col; 545 bap = ap + bs2 * i; 546 if (roworiented) { 547 for (ii = 0; ii < bs; ii++) { 548 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 549 } 550 } else { 551 for (ii = 0; ii < bs; ii++) { 552 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 553 } 554 } 555 noinsert2:; 556 ailen[row] = nrow; 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 /* 561 This routine could be optimized by removing the need for the block copy below and passing stride information 562 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 563 */ 564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 565 { 566 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 567 const MatScalar *value; 568 MatScalar *barray = baij->barray; 569 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 570 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 571 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 572 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 573 574 PetscFunctionBegin; 575 if (!barray) { 576 PetscCall(PetscMalloc1(bs2, &barray)); 577 baij->barray = barray; 578 } 579 580 if (roworiented) { 581 stepval = (n - 1) * bs; 582 } else { 583 stepval = (m - 1) * bs; 584 } 585 for (i = 0; i < m; i++) { 586 if (im[i] < 0) continue; 587 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 588 if (im[i] >= rstart && im[i] < rend) { 589 row = im[i] - rstart; 590 for (j = 0; j < n; j++) { 591 if (im[i] > in[j]) { 592 PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 593 continue; /* ignore lower triangular blocks */ 594 } 595 /* If NumCol = 1 then a copy is not required */ 596 if (roworiented && n == 1) { 597 barray = (MatScalar *)v + i * bs2; 598 } else if ((!roworiented) && (m == 1)) { 599 barray = (MatScalar *)v + j * bs2; 600 } else { /* Here a copy is required */ 601 if (roworiented) { 602 value = v + i * (stepval + bs) * bs + j * bs; 603 } else { 604 value = v + j * (stepval + bs) * bs + i * bs; 605 } 606 for (ii = 0; ii < bs; ii++, value += stepval) { 607 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 608 } 609 barray -= bs2; 610 } 611 612 if (in[j] >= cstart && in[j] < cend) { 613 col = in[j] - cstart; 614 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 615 } else if (in[j] < 0) { 616 continue; 617 } else { 618 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 619 if (mat->was_assembled) { 620 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 621 622 #if defined(PETSC_USE_CTABLE) 623 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 624 col = col < 1 ? -1 : (col - 1) / bs; 625 #else 626 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 627 #endif 628 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 629 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 630 col = in[j]; 631 } 632 } else col = in[j]; 633 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 634 } 635 } 636 } else { 637 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 651 { 652 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 653 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 654 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 655 656 PetscFunctionBegin; 657 for (i = 0; i < m; i++) { 658 if (idxm[i] < 0) continue; /* negative row */ 659 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 660 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 661 row = idxm[i] - bsrstart; 662 for (j = 0; j < n; j++) { 663 if (idxn[j] < 0) continue; /* negative column */ 664 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 665 if (idxn[j] >= bscstart && idxn[j] < bscend) { 666 col = idxn[j] - bscstart; 667 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 668 } else { 669 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 670 #if defined(PETSC_USE_CTABLE) 671 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 672 data--; 673 #else 674 data = baij->colmap[idxn[j] / bs] - 1; 675 #endif 676 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 677 else { 678 col = data + idxn[j] % bs; 679 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 680 } 681 } 682 } 683 } 684 PetscFunctionReturn(PETSC_SUCCESS); 685 } 686 687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 688 { 689 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 690 PetscReal sum[2], *lnorm2; 691 692 PetscFunctionBegin; 693 if (baij->size == 1) { 694 PetscCall(MatNorm(baij->A, type, norm)); 695 } else { 696 if (type == NORM_FROBENIUS) { 697 PetscCall(PetscMalloc1(2, &lnorm2)); 698 PetscCall(MatNorm(baij->A, type, lnorm2)); 699 *lnorm2 = (*lnorm2) * (*lnorm2); 700 lnorm2++; /* squar power of norm(A) */ 701 PetscCall(MatNorm(baij->B, type, lnorm2)); 702 *lnorm2 = (*lnorm2) * (*lnorm2); 703 lnorm2--; /* squar power of norm(B) */ 704 PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 705 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 706 PetscCall(PetscFree(lnorm2)); 707 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 708 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 709 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 710 PetscReal *rsum, vabs; 711 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 712 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 713 MatScalar *v; 714 715 PetscCall(PetscMalloc1(mat->cmap->N, &rsum)); 716 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 717 /* Amat */ 718 v = amat->a; 719 jj = amat->j; 720 for (brow = 0; brow < mbs; brow++) { 721 grow = bs * (rstart + brow); 722 nz = amat->i[brow + 1] - amat->i[brow]; 723 for (bcol = 0; bcol < nz; bcol++) { 724 gcol = bs * (rstart + *jj); 725 jj++; 726 for (col = 0; col < bs; col++) { 727 for (row = 0; row < bs; row++) { 728 vabs = PetscAbsScalar(*v); 729 v++; 730 rsum[gcol + col] += vabs; 731 /* non-diagonal block */ 732 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 733 } 734 } 735 } 736 PetscCall(PetscLogFlops(nz * bs * bs)); 737 } 738 /* Bmat */ 739 v = bmat->a; 740 jj = bmat->j; 741 for (brow = 0; brow < mbs; brow++) { 742 grow = bs * (rstart + brow); 743 nz = bmat->i[brow + 1] - bmat->i[brow]; 744 for (bcol = 0; bcol < nz; bcol++) { 745 gcol = bs * garray[*jj]; 746 jj++; 747 for (col = 0; col < bs; col++) { 748 for (row = 0; row < bs; row++) { 749 vabs = PetscAbsScalar(*v); 750 v++; 751 rsum[gcol + col] += vabs; 752 rsum[grow + row] += vabs; 753 } 754 } 755 } 756 PetscCall(PetscLogFlops(nz * bs * bs)); 757 } 758 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 759 *norm = 0.0; 760 for (col = 0; col < mat->cmap->N; col++) { 761 if (rsum[col] > *norm) *norm = rsum[col]; 762 } 763 PetscCall(PetscFree(rsum)); 764 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 765 } 766 PetscFunctionReturn(PETSC_SUCCESS); 767 } 768 769 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 770 { 771 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 772 PetscInt nstash, reallocs; 773 774 PetscFunctionBegin; 775 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 776 777 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 778 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 779 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 780 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 781 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 782 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 783 PetscFunctionReturn(PETSC_SUCCESS); 784 } 785 786 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 787 { 788 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 789 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 790 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 791 PetscInt *row, *col; 792 PetscBool all_assembled; 793 PetscMPIInt n; 794 PetscBool r1, r2, r3; 795 MatScalar *val; 796 797 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 798 PetscFunctionBegin; 799 if (!baij->donotstash && !mat->nooffprocentries) { 800 while (1) { 801 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 802 if (!flg) break; 803 804 for (i = 0; i < n;) { 805 /* Now identify the consecutive vals belonging to the same row */ 806 for (j = i, rstart = row[j]; j < n; j++) { 807 if (row[j] != rstart) break; 808 } 809 if (j < n) ncols = j - i; 810 else ncols = n - i; 811 /* Now assemble all these values with a single function call */ 812 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 813 i = j; 814 } 815 } 816 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 817 /* Now process the block-stash. Since the values are stashed column-oriented, 818 set the row-oriented flag to column-oriented, and after MatSetValues() 819 restore the original flags */ 820 r1 = baij->roworiented; 821 r2 = a->roworiented; 822 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 823 824 baij->roworiented = PETSC_FALSE; 825 a->roworiented = PETSC_FALSE; 826 827 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */ 828 while (1) { 829 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 830 if (!flg) break; 831 832 for (i = 0; i < n;) { 833 /* Now identify the consecutive vals belonging to the same row */ 834 for (j = i, rstart = row[j]; j < n; j++) { 835 if (row[j] != rstart) break; 836 } 837 if (j < n) ncols = j - i; 838 else ncols = n - i; 839 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 840 i = j; 841 } 842 } 843 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 844 845 baij->roworiented = r1; 846 a->roworiented = r2; 847 848 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 849 } 850 851 PetscCall(MatAssemblyBegin(baij->A, mode)); 852 PetscCall(MatAssemblyEnd(baij->A, mode)); 853 854 /* determine if any process has disassembled, if so we must 855 also disassemble ourselves, in order that we may reassemble. */ 856 /* 857 if nonzero structure of submatrix B cannot change then we know that 858 no process disassembled thus we can skip this stuff 859 */ 860 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 861 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 862 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 863 } 864 865 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 866 PetscCall(MatAssemblyBegin(baij->B, mode)); 867 PetscCall(MatAssemblyEnd(baij->B, mode)); 868 869 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 870 871 baij->rowvalues = NULL; 872 873 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 874 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 875 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 876 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 877 } 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 882 #include <petscdraw.h> 883 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 884 { 885 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 886 PetscInt bs = mat->rmap->bs; 887 PetscMPIInt rank = baij->rank; 888 PetscBool isascii, isdraw; 889 PetscViewer sviewer; 890 PetscViewerFormat format; 891 892 PetscFunctionBegin; 893 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 894 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 895 if (isascii) { 896 PetscCall(PetscViewerGetFormat(viewer, &format)); 897 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 898 MatInfo info; 899 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 900 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 901 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 902 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 903 mat->rmap->bs, info.memory)); 904 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 905 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 906 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 907 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 908 PetscCall(PetscViewerFlush(viewer)); 909 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 910 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 911 PetscCall(VecScatterView(baij->Mvctx, viewer)); 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } else if (format == PETSC_VIEWER_ASCII_INFO) { 914 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 915 PetscFunctionReturn(PETSC_SUCCESS); 916 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 917 PetscFunctionReturn(PETSC_SUCCESS); 918 } 919 } 920 921 if (isdraw) { 922 PetscDraw draw; 923 PetscBool isnull; 924 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 925 PetscCall(PetscDrawIsNull(draw, &isnull)); 926 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 927 } 928 929 { 930 /* assemble the entire matrix onto first processor. */ 931 Mat A; 932 Mat_SeqSBAIJ *Aloc; 933 Mat_SeqBAIJ *Bloc; 934 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 935 MatScalar *a; 936 const char *matname; 937 938 /* Should this be the same type as mat? */ 939 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 940 if (rank == 0) { 941 PetscCall(MatSetSizes(A, M, N, M, N)); 942 } else { 943 PetscCall(MatSetSizes(A, 0, 0, M, N)); 944 } 945 PetscCall(MatSetType(A, MATMPISBAIJ)); 946 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 947 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 948 949 /* copy over the A part */ 950 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 951 ai = Aloc->i; 952 aj = Aloc->j; 953 a = Aloc->a; 954 PetscCall(PetscMalloc1(bs, &rvals)); 955 956 for (i = 0; i < mbs; i++) { 957 rvals[0] = bs * (baij->rstartbs + i); 958 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 959 for (j = ai[i]; j < ai[i + 1]; j++) { 960 col = (baij->cstartbs + aj[j]) * bs; 961 for (k = 0; k < bs; k++) { 962 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 963 col++; 964 a += bs; 965 } 966 } 967 } 968 /* copy over the B part */ 969 Bloc = (Mat_SeqBAIJ *)baij->B->data; 970 ai = Bloc->i; 971 aj = Bloc->j; 972 a = Bloc->a; 973 for (i = 0; i < mbs; i++) { 974 rvals[0] = bs * (baij->rstartbs + i); 975 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 976 for (j = ai[i]; j < ai[i + 1]; j++) { 977 col = baij->garray[aj[j]] * bs; 978 for (k = 0; k < bs; k++) { 979 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 980 col++; 981 a += bs; 982 } 983 } 984 } 985 PetscCall(PetscFree(rvals)); 986 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 987 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 988 /* 989 Everyone has to call to draw the matrix since the graphics waits are 990 synchronized across all processors that share the PetscDraw object 991 */ 992 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 993 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 994 if (rank == 0) { 995 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname)); 996 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer)); 997 } 998 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 999 PetscCall(MatDestroy(&A)); 1000 } 1001 PetscFunctionReturn(PETSC_SUCCESS); 1002 } 1003 1004 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1005 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1006 1007 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1008 { 1009 PetscBool isascii, isdraw, issocket, isbinary; 1010 1011 PetscFunctionBegin; 1012 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1013 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1014 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1015 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1016 if (isascii || isdraw || issocket) { 1017 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1018 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1019 PetscFunctionReturn(PETSC_SUCCESS); 1020 } 1021 1022 #if defined(PETSC_USE_COMPLEX) 1023 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1024 { 1025 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1026 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1027 PetscScalar *from; 1028 const PetscScalar *x; 1029 1030 PetscFunctionBegin; 1031 /* diagonal part */ 1032 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1033 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1034 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1035 PetscCall(VecZeroEntries(a->slvec1b)); 1036 1037 /* subdiagonal part */ 1038 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1039 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1040 1041 /* copy x into the vec slvec0 */ 1042 PetscCall(VecGetArray(a->slvec0, &from)); 1043 PetscCall(VecGetArrayRead(xx, &x)); 1044 1045 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1046 PetscCall(VecRestoreArray(a->slvec0, &from)); 1047 PetscCall(VecRestoreArrayRead(xx, &x)); 1048 1049 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1050 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1051 /* supperdiagonal part */ 1052 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 #endif 1056 1057 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1058 { 1059 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1060 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1061 PetscScalar *from; 1062 const PetscScalar *x; 1063 1064 PetscFunctionBegin; 1065 /* diagonal part */ 1066 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1067 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1068 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1069 PetscCall(VecZeroEntries(a->slvec1b)); 1070 1071 /* subdiagonal part */ 1072 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1073 1074 /* copy x into the vec slvec0 */ 1075 PetscCall(VecGetArray(a->slvec0, &from)); 1076 PetscCall(VecGetArrayRead(xx, &x)); 1077 1078 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1079 PetscCall(VecRestoreArray(a->slvec0, &from)); 1080 PetscCall(VecRestoreArrayRead(xx, &x)); 1081 1082 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1083 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1084 /* supperdiagonal part */ 1085 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 #if PetscDefined(USE_COMPLEX) 1090 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1091 { 1092 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1093 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1094 PetscScalar *from; 1095 const PetscScalar *x; 1096 1097 PetscFunctionBegin; 1098 /* diagonal part */ 1099 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1100 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1101 PetscCall(VecZeroEntries(a->slvec1b)); 1102 1103 /* subdiagonal part */ 1104 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1105 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1106 1107 /* copy x into the vec slvec0 */ 1108 PetscCall(VecGetArray(a->slvec0, &from)); 1109 PetscCall(VecGetArrayRead(xx, &x)); 1110 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1111 PetscCall(VecRestoreArray(a->slvec0, &from)); 1112 1113 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1114 PetscCall(VecRestoreArrayRead(xx, &x)); 1115 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1116 1117 /* supperdiagonal part */ 1118 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1119 PetscFunctionReturn(PETSC_SUCCESS); 1120 } 1121 #endif 1122 1123 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1124 { 1125 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1126 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1127 PetscScalar *from; 1128 const PetscScalar *x; 1129 1130 PetscFunctionBegin; 1131 /* diagonal part */ 1132 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1133 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1134 PetscCall(VecZeroEntries(a->slvec1b)); 1135 1136 /* subdiagonal part */ 1137 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1138 1139 /* copy x into the vec slvec0 */ 1140 PetscCall(VecGetArray(a->slvec0, &from)); 1141 PetscCall(VecGetArrayRead(xx, &x)); 1142 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1143 PetscCall(VecRestoreArray(a->slvec0, &from)); 1144 1145 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1146 PetscCall(VecRestoreArrayRead(xx, &x)); 1147 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1148 1149 /* supperdiagonal part */ 1150 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1151 PetscFunctionReturn(PETSC_SUCCESS); 1152 } 1153 1154 /* 1155 This only works correctly for square matrices where the subblock A->A is the 1156 diagonal block 1157 */ 1158 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1159 { 1160 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1161 1162 PetscFunctionBegin; 1163 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1164 PetscCall(MatGetDiagonal(a->A, v)); 1165 PetscFunctionReturn(PETSC_SUCCESS); 1166 } 1167 1168 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1169 { 1170 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1171 1172 PetscFunctionBegin; 1173 PetscCall(MatScale(a->A, aa)); 1174 PetscCall(MatScale(a->B, aa)); 1175 PetscFunctionReturn(PETSC_SUCCESS); 1176 } 1177 1178 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1179 { 1180 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1181 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1182 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1183 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1184 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1185 1186 PetscFunctionBegin; 1187 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1188 mat->getrowactive = PETSC_TRUE; 1189 1190 if (!mat->rowvalues && (idx || v)) { 1191 /* 1192 allocate enough space to hold information from the longest row. 1193 */ 1194 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1195 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1196 PetscInt max = 1, mbs = mat->mbs, tmp; 1197 for (i = 0; i < mbs; i++) { 1198 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1199 if (max < tmp) max = tmp; 1200 } 1201 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1202 } 1203 1204 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1205 lrow = row - brstart; /* local row index */ 1206 1207 pvA = &vworkA; 1208 pcA = &cworkA; 1209 pvB = &vworkB; 1210 pcB = &cworkB; 1211 if (!v) { 1212 pvA = NULL; 1213 pvB = NULL; 1214 } 1215 if (!idx) { 1216 pcA = NULL; 1217 if (!v) pcB = NULL; 1218 } 1219 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1220 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1221 nztot = nzA + nzB; 1222 1223 cmap = mat->garray; 1224 if (v || idx) { 1225 if (nztot) { 1226 /* Sort by increasing column numbers, assuming A and B already sorted */ 1227 PetscInt imark = -1; 1228 if (v) { 1229 *v = v_p = mat->rowvalues; 1230 for (i = 0; i < nzB; i++) { 1231 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1232 else break; 1233 } 1234 imark = i; 1235 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1236 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1237 } 1238 if (idx) { 1239 *idx = idx_p = mat->rowindices; 1240 if (imark > -1) { 1241 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1242 } else { 1243 for (i = 0; i < nzB; i++) { 1244 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1245 else break; 1246 } 1247 imark = i; 1248 } 1249 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1250 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1251 } 1252 } else { 1253 if (idx) *idx = NULL; 1254 if (v) *v = NULL; 1255 } 1256 } 1257 *nz = nztot; 1258 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1259 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1260 PetscFunctionReturn(PETSC_SUCCESS); 1261 } 1262 1263 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1264 { 1265 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1266 1267 PetscFunctionBegin; 1268 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1269 baij->getrowactive = PETSC_FALSE; 1270 PetscFunctionReturn(PETSC_SUCCESS); 1271 } 1272 1273 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1274 { 1275 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1276 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1277 1278 PetscFunctionBegin; 1279 aA->getrow_utriangular = PETSC_TRUE; 1280 PetscFunctionReturn(PETSC_SUCCESS); 1281 } 1282 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1283 { 1284 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1285 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1286 1287 PetscFunctionBegin; 1288 aA->getrow_utriangular = PETSC_FALSE; 1289 PetscFunctionReturn(PETSC_SUCCESS); 1290 } 1291 1292 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1293 { 1294 PetscFunctionBegin; 1295 if (PetscDefined(USE_COMPLEX)) { 1296 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1297 1298 PetscCall(MatConjugate(a->A)); 1299 PetscCall(MatConjugate(a->B)); 1300 } 1301 PetscFunctionReturn(PETSC_SUCCESS); 1302 } 1303 1304 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1305 { 1306 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1307 1308 PetscFunctionBegin; 1309 PetscCall(MatRealPart(a->A)); 1310 PetscCall(MatRealPart(a->B)); 1311 PetscFunctionReturn(PETSC_SUCCESS); 1312 } 1313 1314 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1315 { 1316 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1317 1318 PetscFunctionBegin; 1319 PetscCall(MatImaginaryPart(a->A)); 1320 PetscCall(MatImaginaryPart(a->B)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } 1323 1324 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1325 Input: isrow - distributed(parallel), 1326 iscol_local - locally owned (seq) 1327 */ 1328 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1329 { 1330 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1331 const PetscInt *ptr1, *ptr2; 1332 1333 PetscFunctionBegin; 1334 *flg = PETSC_FALSE; 1335 PetscCall(ISGetLocalSize(isrow, &sz1)); 1336 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1337 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1338 1339 PetscCall(ISGetIndices(isrow, &ptr1)); 1340 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1341 1342 PetscCall(PetscMalloc1(sz1, &a1)); 1343 PetscCall(PetscMalloc1(sz2, &a2)); 1344 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1345 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1346 PetscCall(PetscSortInt(sz1, a1)); 1347 PetscCall(PetscSortInt(sz2, a2)); 1348 1349 nmatch = 0; 1350 k = 0; 1351 for (i = 0; i < sz1; i++) { 1352 for (j = k; j < sz2; j++) { 1353 if (a1[i] == a2[j]) { 1354 k = j; 1355 nmatch++; 1356 break; 1357 } 1358 } 1359 } 1360 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1361 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1362 PetscCall(PetscFree(a1)); 1363 PetscCall(PetscFree(a2)); 1364 if (nmatch < sz1) { 1365 *flg = PETSC_FALSE; 1366 } else { 1367 *flg = PETSC_TRUE; 1368 } 1369 PetscFunctionReturn(PETSC_SUCCESS); 1370 } 1371 1372 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1373 { 1374 Mat C[2]; 1375 IS iscol_local, isrow_local; 1376 PetscInt csize, csize_local, rsize; 1377 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1378 1379 PetscFunctionBegin; 1380 PetscCall(ISGetLocalSize(iscol, &csize)); 1381 PetscCall(ISGetLocalSize(isrow, &rsize)); 1382 if (call == MAT_REUSE_MATRIX) { 1383 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1384 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1385 } else { 1386 PetscCall(ISAllGather(iscol, &iscol_local)); 1387 PetscCall(ISSorted(iscol_local, &issorted)); 1388 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1389 } 1390 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1391 if (!isequal) { 1392 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1393 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1394 if (!isidentity) { 1395 if (call == MAT_REUSE_MATRIX) { 1396 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1397 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1398 } else { 1399 PetscCall(ISAllGather(isrow, &isrow_local)); 1400 PetscCall(ISSorted(isrow_local, &issorted)); 1401 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1402 } 1403 } 1404 } 1405 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1406 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1407 if (!isequal && !isidentity) { 1408 if (call == MAT_INITIAL_MATRIX) { 1409 IS intersect; 1410 PetscInt ni; 1411 1412 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1413 PetscCall(ISGetLocalSize(intersect, &ni)); 1414 PetscCall(ISDestroy(&intersect)); 1415 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1416 } 1417 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1418 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1419 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1420 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1421 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1422 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1423 PetscCall(MatDestroy(C)); 1424 PetscCall(MatDestroy(C + 1)); 1425 } 1426 if (call == MAT_INITIAL_MATRIX) { 1427 if (!isequal && !isidentity) { 1428 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1429 PetscCall(ISDestroy(&isrow_local)); 1430 } 1431 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1432 PetscCall(ISDestroy(&iscol_local)); 1433 } 1434 PetscFunctionReturn(PETSC_SUCCESS); 1435 } 1436 1437 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1438 { 1439 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1440 1441 PetscFunctionBegin; 1442 PetscCall(MatZeroEntries(l->A)); 1443 PetscCall(MatZeroEntries(l->B)); 1444 PetscFunctionReturn(PETSC_SUCCESS); 1445 } 1446 1447 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1448 { 1449 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1450 Mat A = a->A, B = a->B; 1451 PetscLogDouble isend[5], irecv[5]; 1452 1453 PetscFunctionBegin; 1454 info->block_size = (PetscReal)matin->rmap->bs; 1455 1456 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1457 1458 isend[0] = info->nz_used; 1459 isend[1] = info->nz_allocated; 1460 isend[2] = info->nz_unneeded; 1461 isend[3] = info->memory; 1462 isend[4] = info->mallocs; 1463 1464 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1465 1466 isend[0] += info->nz_used; 1467 isend[1] += info->nz_allocated; 1468 isend[2] += info->nz_unneeded; 1469 isend[3] += info->memory; 1470 isend[4] += info->mallocs; 1471 if (flag == MAT_LOCAL) { 1472 info->nz_used = isend[0]; 1473 info->nz_allocated = isend[1]; 1474 info->nz_unneeded = isend[2]; 1475 info->memory = isend[3]; 1476 info->mallocs = isend[4]; 1477 } else if (flag == MAT_GLOBAL_MAX) { 1478 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1479 1480 info->nz_used = irecv[0]; 1481 info->nz_allocated = irecv[1]; 1482 info->nz_unneeded = irecv[2]; 1483 info->memory = irecv[3]; 1484 info->mallocs = irecv[4]; 1485 } else if (flag == MAT_GLOBAL_SUM) { 1486 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1487 1488 info->nz_used = irecv[0]; 1489 info->nz_allocated = irecv[1]; 1490 info->nz_unneeded = irecv[2]; 1491 info->memory = irecv[3]; 1492 info->mallocs = irecv[4]; 1493 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1494 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1495 info->fill_ratio_needed = 0; 1496 info->factor_mallocs = 0; 1497 PetscFunctionReturn(PETSC_SUCCESS); 1498 } 1499 1500 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1501 { 1502 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1503 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1504 1505 PetscFunctionBegin; 1506 switch (op) { 1507 case MAT_NEW_NONZERO_LOCATIONS: 1508 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1509 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1510 case MAT_KEEP_NONZERO_PATTERN: 1511 case MAT_NEW_NONZERO_LOCATION_ERR: 1512 MatCheckPreallocated(A, 1); 1513 PetscCall(MatSetOption(a->A, op, flg)); 1514 PetscCall(MatSetOption(a->B, op, flg)); 1515 break; 1516 case MAT_ROW_ORIENTED: 1517 MatCheckPreallocated(A, 1); 1518 a->roworiented = flg; 1519 1520 PetscCall(MatSetOption(a->A, op, flg)); 1521 PetscCall(MatSetOption(a->B, op, flg)); 1522 break; 1523 case MAT_IGNORE_OFF_PROC_ENTRIES: 1524 a->donotstash = flg; 1525 break; 1526 case MAT_USE_HASH_TABLE: 1527 a->ht_flag = flg; 1528 break; 1529 case MAT_HERMITIAN: 1530 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1531 #if defined(PETSC_USE_COMPLEX) 1532 if (flg) { /* need different mat-vec ops */ 1533 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1534 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1535 A->ops->multtranspose = NULL; 1536 A->ops->multtransposeadd = NULL; 1537 A->symmetric = PETSC_BOOL3_FALSE; 1538 } 1539 #endif 1540 break; 1541 case MAT_SPD: 1542 case MAT_SYMMETRIC: 1543 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1544 #if defined(PETSC_USE_COMPLEX) 1545 if (flg) { /* restore to use default mat-vec ops */ 1546 A->ops->mult = MatMult_MPISBAIJ; 1547 A->ops->multadd = MatMultAdd_MPISBAIJ; 1548 A->ops->multtranspose = MatMult_MPISBAIJ; 1549 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1550 } 1551 #endif 1552 break; 1553 case MAT_STRUCTURALLY_SYMMETRIC: 1554 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1555 break; 1556 case MAT_IGNORE_LOWER_TRIANGULAR: 1557 case MAT_ERROR_LOWER_TRIANGULAR: 1558 aA->ignore_ltriangular = flg; 1559 break; 1560 case MAT_GETROW_UPPERTRIANGULAR: 1561 aA->getrow_utriangular = flg; 1562 break; 1563 default: 1564 break; 1565 } 1566 PetscFunctionReturn(PETSC_SUCCESS); 1567 } 1568 1569 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1570 { 1571 PetscFunctionBegin; 1572 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1573 if (reuse == MAT_INITIAL_MATRIX) { 1574 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1575 } else if (reuse == MAT_REUSE_MATRIX) { 1576 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1577 } 1578 PetscFunctionReturn(PETSC_SUCCESS); 1579 } 1580 1581 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1582 { 1583 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1584 Mat a = baij->A, b = baij->B; 1585 PetscInt nv, m, n; 1586 PetscBool flg; 1587 1588 PetscFunctionBegin; 1589 if (ll != rr) { 1590 PetscCall(VecEqual(ll, rr, &flg)); 1591 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1592 } 1593 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1594 1595 PetscCall(MatGetLocalSize(mat, &m, &n)); 1596 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1597 1598 PetscCall(VecGetLocalSize(rr, &nv)); 1599 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1600 1601 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1602 1603 /* left diagonalscale the off-diagonal part */ 1604 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1605 1606 /* scale the diagonal part */ 1607 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1608 1609 /* right diagonalscale the off-diagonal part */ 1610 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1611 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1612 PetscFunctionReturn(PETSC_SUCCESS); 1613 } 1614 1615 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1616 { 1617 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1618 1619 PetscFunctionBegin; 1620 PetscCall(MatSetUnfactored(a->A)); 1621 PetscFunctionReturn(PETSC_SUCCESS); 1622 } 1623 1624 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1625 1626 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1627 { 1628 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1629 Mat a, b, c, d; 1630 PetscBool flg; 1631 1632 PetscFunctionBegin; 1633 a = matA->A; 1634 b = matA->B; 1635 c = matB->A; 1636 d = matB->B; 1637 1638 PetscCall(MatEqual(a, c, &flg)); 1639 if (flg) PetscCall(MatEqual(b, d, &flg)); 1640 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1641 PetscFunctionReturn(PETSC_SUCCESS); 1642 } 1643 1644 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1645 { 1646 PetscBool isbaij; 1647 1648 PetscFunctionBegin; 1649 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1650 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1651 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1652 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1653 PetscCall(MatGetRowUpperTriangular(A)); 1654 PetscCall(MatCopy_Basic(A, B, str)); 1655 PetscCall(MatRestoreRowUpperTriangular(A)); 1656 } else { 1657 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1658 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1659 1660 PetscCall(MatCopy(a->A, b->A, str)); 1661 PetscCall(MatCopy(a->B, b->B, str)); 1662 } 1663 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1664 PetscFunctionReturn(PETSC_SUCCESS); 1665 } 1666 1667 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1668 { 1669 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1670 PetscBLASInt bnz, one = 1; 1671 Mat_SeqSBAIJ *xa, *ya; 1672 Mat_SeqBAIJ *xb, *yb; 1673 1674 PetscFunctionBegin; 1675 if (str == SAME_NONZERO_PATTERN) { 1676 PetscScalar alpha = a; 1677 xa = (Mat_SeqSBAIJ *)xx->A->data; 1678 ya = (Mat_SeqSBAIJ *)yy->A->data; 1679 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1680 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1681 xb = (Mat_SeqBAIJ *)xx->B->data; 1682 yb = (Mat_SeqBAIJ *)yy->B->data; 1683 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1684 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1685 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1686 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1687 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1688 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1689 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1690 } else { 1691 Mat B; 1692 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1693 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1694 PetscCall(MatGetRowUpperTriangular(X)); 1695 PetscCall(MatGetRowUpperTriangular(Y)); 1696 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1697 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1698 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1699 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1700 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1701 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1702 PetscCall(MatSetType(B, MATMPISBAIJ)); 1703 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1704 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1705 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1706 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1707 PetscCall(MatHeaderMerge(Y, &B)); 1708 PetscCall(PetscFree(nnz_d)); 1709 PetscCall(PetscFree(nnz_o)); 1710 PetscCall(MatRestoreRowUpperTriangular(X)); 1711 PetscCall(MatRestoreRowUpperTriangular(Y)); 1712 } 1713 PetscFunctionReturn(PETSC_SUCCESS); 1714 } 1715 1716 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1717 { 1718 PetscInt i; 1719 PetscBool flg; 1720 1721 PetscFunctionBegin; 1722 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1723 for (i = 0; i < n; i++) { 1724 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1725 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1726 } 1727 PetscFunctionReturn(PETSC_SUCCESS); 1728 } 1729 1730 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1731 { 1732 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1733 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1734 1735 PetscFunctionBegin; 1736 if (!Y->preallocated) { 1737 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1738 } else if (!aij->nz) { 1739 PetscInt nonew = aij->nonew; 1740 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1741 aij->nonew = nonew; 1742 } 1743 PetscCall(MatShift_Basic(Y, a)); 1744 PetscFunctionReturn(PETSC_SUCCESS); 1745 } 1746 1747 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1748 { 1749 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1750 1751 PetscFunctionBegin; 1752 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1753 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1754 if (d) { 1755 PetscInt rstart; 1756 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1757 *d += rstart / A->rmap->bs; 1758 } 1759 PetscFunctionReturn(PETSC_SUCCESS); 1760 } 1761 1762 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1763 { 1764 PetscFunctionBegin; 1765 *a = ((Mat_MPISBAIJ *)A->data)->A; 1766 PetscFunctionReturn(PETSC_SUCCESS); 1767 } 1768 1769 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1770 { 1771 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1772 1773 PetscFunctionBegin; 1774 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1775 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1776 PetscFunctionReturn(PETSC_SUCCESS); 1777 } 1778 1779 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer); 1780 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]); 1781 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 1782 1783 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1784 MatGetRow_MPISBAIJ, 1785 MatRestoreRow_MPISBAIJ, 1786 MatMult_MPISBAIJ, 1787 /* 4*/ MatMultAdd_MPISBAIJ, 1788 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1789 MatMultAdd_MPISBAIJ, 1790 NULL, 1791 NULL, 1792 NULL, 1793 /* 10*/ NULL, 1794 NULL, 1795 NULL, 1796 MatSOR_MPISBAIJ, 1797 MatTranspose_MPISBAIJ, 1798 /* 15*/ MatGetInfo_MPISBAIJ, 1799 MatEqual_MPISBAIJ, 1800 MatGetDiagonal_MPISBAIJ, 1801 MatDiagonalScale_MPISBAIJ, 1802 MatNorm_MPISBAIJ, 1803 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1804 MatAssemblyEnd_MPISBAIJ, 1805 MatSetOption_MPISBAIJ, 1806 MatZeroEntries_MPISBAIJ, 1807 /* 24*/ NULL, 1808 NULL, 1809 NULL, 1810 NULL, 1811 NULL, 1812 /* 29*/ MatSetUp_MPI_Hash, 1813 NULL, 1814 NULL, 1815 MatGetDiagonalBlock_MPISBAIJ, 1816 NULL, 1817 /* 34*/ MatDuplicate_MPISBAIJ, 1818 NULL, 1819 NULL, 1820 NULL, 1821 NULL, 1822 /* 39*/ MatAXPY_MPISBAIJ, 1823 MatCreateSubMatrices_MPISBAIJ, 1824 MatIncreaseOverlap_MPISBAIJ, 1825 MatGetValues_MPISBAIJ, 1826 MatCopy_MPISBAIJ, 1827 /* 44*/ NULL, 1828 MatScale_MPISBAIJ, 1829 MatShift_MPISBAIJ, 1830 NULL, 1831 NULL, 1832 /* 49*/ NULL, 1833 NULL, 1834 NULL, 1835 NULL, 1836 NULL, 1837 /* 54*/ NULL, 1838 NULL, 1839 MatSetUnfactored_MPISBAIJ, 1840 NULL, 1841 MatSetValuesBlocked_MPISBAIJ, 1842 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1843 NULL, 1844 NULL, 1845 NULL, 1846 NULL, 1847 /* 64*/ NULL, 1848 NULL, 1849 NULL, 1850 NULL, 1851 MatGetRowMaxAbs_MPISBAIJ, 1852 /* 69*/ NULL, 1853 MatConvert_MPISBAIJ_Basic, 1854 NULL, 1855 NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 NULL, 1860 NULL, 1861 MatLoad_MPISBAIJ, 1862 /* 79*/ NULL, 1863 NULL, 1864 NULL, 1865 NULL, 1866 NULL, 1867 /* 84*/ NULL, 1868 NULL, 1869 NULL, 1870 NULL, 1871 NULL, 1872 /* 89*/ NULL, 1873 NULL, 1874 NULL, 1875 NULL, 1876 MatConjugate_MPISBAIJ, 1877 /* 94*/ NULL, 1878 NULL, 1879 MatRealPart_MPISBAIJ, 1880 MatImaginaryPart_MPISBAIJ, 1881 MatGetRowUpperTriangular_MPISBAIJ, 1882 /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ, 1883 NULL, 1884 NULL, 1885 NULL, 1886 NULL, 1887 /*104*/ MatMissingDiagonal_MPISBAIJ, 1888 NULL, 1889 NULL, 1890 NULL, 1891 NULL, 1892 /*109*/ NULL, 1893 NULL, 1894 NULL, 1895 NULL, 1896 NULL, 1897 /*114*/ NULL, 1898 NULL, 1899 NULL, 1900 NULL, 1901 NULL, 1902 /*119*/ NULL, 1903 NULL, 1904 NULL, 1905 NULL, 1906 NULL, 1907 /*124*/ NULL, 1908 NULL, 1909 NULL, 1910 MatSetBlockSizes_Default, 1911 NULL, 1912 /*129*/ NULL, 1913 NULL, 1914 MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1915 NULL, 1916 NULL, 1917 /*134*/ NULL, 1918 NULL, 1919 NULL, 1920 MatEliminateZeros_MPISBAIJ, 1921 NULL, 1922 /*139*/ NULL, 1923 NULL, 1924 NULL, 1925 MatCopyHashToXAIJ_MPI_Hash, 1926 NULL}; 1927 1928 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1929 { 1930 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1931 PetscInt i, mbs, Mbs; 1932 PetscMPIInt size; 1933 1934 PetscFunctionBegin; 1935 if (B->hash_active) { 1936 B->ops[0] = b->cops; 1937 B->hash_active = PETSC_FALSE; 1938 } 1939 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1940 PetscCall(MatSetBlockSize(B, bs)); 1941 PetscCall(PetscLayoutSetUp(B->rmap)); 1942 PetscCall(PetscLayoutSetUp(B->cmap)); 1943 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1944 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1945 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1946 1947 mbs = B->rmap->n / bs; 1948 Mbs = B->rmap->N / bs; 1949 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1950 1951 B->rmap->bs = bs; 1952 b->bs2 = bs * bs; 1953 b->mbs = mbs; 1954 b->Mbs = Mbs; 1955 b->nbs = B->cmap->n / bs; 1956 b->Nbs = B->cmap->N / bs; 1957 1958 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1959 b->rstartbs = B->rmap->rstart / bs; 1960 b->rendbs = B->rmap->rend / bs; 1961 1962 b->cstartbs = B->cmap->rstart / bs; 1963 b->cendbs = B->cmap->rend / bs; 1964 1965 #if defined(PETSC_USE_CTABLE) 1966 PetscCall(PetscHMapIDestroy(&b->colmap)); 1967 #else 1968 PetscCall(PetscFree(b->colmap)); 1969 #endif 1970 PetscCall(PetscFree(b->garray)); 1971 PetscCall(VecDestroy(&b->lvec)); 1972 PetscCall(VecScatterDestroy(&b->Mvctx)); 1973 PetscCall(VecDestroy(&b->slvec0)); 1974 PetscCall(VecDestroy(&b->slvec0b)); 1975 PetscCall(VecDestroy(&b->slvec1)); 1976 PetscCall(VecDestroy(&b->slvec1a)); 1977 PetscCall(VecDestroy(&b->slvec1b)); 1978 PetscCall(VecScatterDestroy(&b->sMvctx)); 1979 1980 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1981 1982 MatSeqXAIJGetOptions_Private(b->B); 1983 PetscCall(MatDestroy(&b->B)); 1984 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1985 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1986 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1987 MatSeqXAIJRestoreOptions_Private(b->B); 1988 1989 MatSeqXAIJGetOptions_Private(b->A); 1990 PetscCall(MatDestroy(&b->A)); 1991 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1992 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1993 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1994 MatSeqXAIJRestoreOptions_Private(b->A); 1995 1996 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1997 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1998 1999 B->preallocated = PETSC_TRUE; 2000 B->was_assembled = PETSC_FALSE; 2001 B->assembled = PETSC_FALSE; 2002 PetscFunctionReturn(PETSC_SUCCESS); 2003 } 2004 2005 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2006 { 2007 PetscInt m, rstart, cend; 2008 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2009 const PetscInt *JJ = NULL; 2010 PetscScalar *values = NULL; 2011 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2012 PetscBool nooffprocentries; 2013 2014 PetscFunctionBegin; 2015 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2016 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2017 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2018 PetscCall(PetscLayoutSetUp(B->rmap)); 2019 PetscCall(PetscLayoutSetUp(B->cmap)); 2020 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2021 m = B->rmap->n / bs; 2022 rstart = B->rmap->rstart / bs; 2023 cend = B->cmap->rend / bs; 2024 2025 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2026 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2027 for (i = 0; i < m; i++) { 2028 nz = ii[i + 1] - ii[i]; 2029 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2030 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2031 JJ = jj + ii[i]; 2032 bd = 0; 2033 for (j = 0; j < nz; j++) { 2034 if (*JJ >= i + rstart) break; 2035 JJ++; 2036 bd++; 2037 } 2038 d = 0; 2039 for (; j < nz; j++) { 2040 if (*JJ++ >= cend) break; 2041 d++; 2042 } 2043 d_nnz[i] = d; 2044 o_nnz[i] = nz - d - bd; 2045 nz = nz - bd; 2046 nz_max = PetscMax(nz_max, nz); 2047 } 2048 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2049 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2050 PetscCall(PetscFree2(d_nnz, o_nnz)); 2051 2052 values = (PetscScalar *)V; 2053 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2054 for (i = 0; i < m; i++) { 2055 PetscInt row = i + rstart; 2056 PetscInt ncols = ii[i + 1] - ii[i]; 2057 const PetscInt *icols = jj + ii[i]; 2058 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2059 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2060 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2061 } else { /* block ordering does not match so we can only insert one block at a time. */ 2062 PetscInt j; 2063 for (j = 0; j < ncols; j++) { 2064 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2065 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2066 } 2067 } 2068 } 2069 2070 if (!V) PetscCall(PetscFree(values)); 2071 nooffprocentries = B->nooffprocentries; 2072 B->nooffprocentries = PETSC_TRUE; 2073 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2074 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2075 B->nooffprocentries = nooffprocentries; 2076 2077 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /*MC 2082 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2083 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2084 the matrix is stored. 2085 2086 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2087 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2088 2089 Options Database Key: 2090 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2091 2092 Level: beginner 2093 2094 Note: 2095 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2096 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2097 2098 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2099 M*/ 2100 2101 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2102 { 2103 Mat_MPISBAIJ *b; 2104 PetscBool flg = PETSC_FALSE; 2105 2106 PetscFunctionBegin; 2107 PetscCall(PetscNew(&b)); 2108 B->data = (void *)b; 2109 B->ops[0] = MatOps_Values; 2110 2111 B->ops->destroy = MatDestroy_MPISBAIJ; 2112 B->ops->view = MatView_MPISBAIJ; 2113 B->assembled = PETSC_FALSE; 2114 B->insertmode = NOT_SET_VALUES; 2115 2116 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2117 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2118 2119 /* build local table of row and column ownerships */ 2120 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2121 2122 /* build cache for off array entries formed */ 2123 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2124 2125 b->donotstash = PETSC_FALSE; 2126 b->colmap = NULL; 2127 b->garray = NULL; 2128 b->roworiented = PETSC_TRUE; 2129 2130 /* stuff used in block assembly */ 2131 b->barray = NULL; 2132 2133 /* stuff used for matrix vector multiply */ 2134 b->lvec = NULL; 2135 b->Mvctx = NULL; 2136 b->slvec0 = NULL; 2137 b->slvec0b = NULL; 2138 b->slvec1 = NULL; 2139 b->slvec1a = NULL; 2140 b->slvec1b = NULL; 2141 b->sMvctx = NULL; 2142 2143 /* stuff for MatGetRow() */ 2144 b->rowindices = NULL; 2145 b->rowvalues = NULL; 2146 b->getrowactive = PETSC_FALSE; 2147 2148 /* hash table stuff */ 2149 b->ht = NULL; 2150 b->hd = NULL; 2151 b->ht_size = 0; 2152 b->ht_flag = PETSC_FALSE; 2153 b->ht_fact = 0; 2154 b->ht_total_ct = 0; 2155 b->ht_insert_ct = 0; 2156 2157 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2158 b->ijonly = PETSC_FALSE; 2159 2160 b->in_loc = NULL; 2161 b->v_loc = NULL; 2162 b->n_loc = 0; 2163 2164 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2165 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2166 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2167 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2168 #if defined(PETSC_HAVE_ELEMENTAL) 2169 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2170 #endif 2171 #if defined(PETSC_HAVE_SCALAPACK) 2172 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2173 #endif 2174 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2175 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2176 2177 B->symmetric = PETSC_BOOL3_TRUE; 2178 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2179 B->symmetry_eternal = PETSC_TRUE; 2180 B->structural_symmetry_eternal = PETSC_TRUE; 2181 #if defined(PETSC_USE_COMPLEX) 2182 B->hermitian = PETSC_BOOL3_FALSE; 2183 #else 2184 B->hermitian = PETSC_BOOL3_TRUE; 2185 #endif 2186 2187 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2188 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2189 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2190 if (flg) { 2191 PetscReal fact = 1.39; 2192 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2193 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2194 if (fact <= 1.0) fact = 1.39; 2195 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2196 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2197 } 2198 PetscOptionsEnd(); 2199 PetscFunctionReturn(PETSC_SUCCESS); 2200 } 2201 2202 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2203 /*MC 2204 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2205 2206 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2207 and `MATMPISBAIJ` otherwise. 2208 2209 Options Database Key: 2210 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2211 2212 Level: beginner 2213 2214 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2215 M*/ 2216 2217 /*@ 2218 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2219 the user should preallocate the matrix storage by setting the parameters 2220 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2221 performance can be increased by more than a factor of 50. 2222 2223 Collective 2224 2225 Input Parameters: 2226 + B - the matrix 2227 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2228 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2229 . d_nz - number of block nonzeros per block row in diagonal portion of local 2230 submatrix (same for all local rows) 2231 . d_nnz - array containing the number of block nonzeros in the various block rows 2232 in the upper triangular and diagonal part of the in diagonal portion of the local 2233 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2234 for the diagonal entry and set a value even if it is zero. 2235 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2236 submatrix (same for all local rows). 2237 - o_nnz - array containing the number of nonzeros in the various block rows of the 2238 off-diagonal portion of the local submatrix that is right of the diagonal 2239 (possibly different for each block row) or `NULL`. 2240 2241 Options Database Keys: 2242 + -mat_no_unroll - uses code that does not unroll the loops in the 2243 block calculations (much slower) 2244 - -mat_block_size - size of the blocks to use 2245 2246 Level: intermediate 2247 2248 Notes: 2249 2250 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2251 than it must be used on all processors that share the object for that argument. 2252 2253 If the *_nnz parameter is given then the *_nz parameter is ignored 2254 2255 Storage Information: 2256 For a square global matrix we define each processor's diagonal portion 2257 to be its local rows and the corresponding columns (a square submatrix); 2258 each processor's off-diagonal portion encompasses the remainder of the 2259 local matrix (a rectangular submatrix). 2260 2261 The user can specify preallocated storage for the diagonal part of 2262 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2263 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2264 memory allocation. Likewise, specify preallocated storage for the 2265 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2266 2267 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2268 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2269 You can also run with the option `-info` and look for messages with the string 2270 malloc in them to see if additional memory allocation was needed. 2271 2272 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2273 the figure below we depict these three local rows and all columns (0-11). 2274 2275 .vb 2276 0 1 2 3 4 5 6 7 8 9 10 11 2277 -------------------------- 2278 row 3 |. . . d d d o o o o o o 2279 row 4 |. . . d d d o o o o o o 2280 row 5 |. . . d d d o o o o o o 2281 -------------------------- 2282 .ve 2283 2284 Thus, any entries in the d locations are stored in the d (diagonal) 2285 submatrix, and any entries in the o locations are stored in the 2286 o (off-diagonal) submatrix. Note that the d matrix is stored in 2287 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2288 2289 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2290 plus the diagonal part of the d matrix, 2291 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2292 2293 In general, for PDE problems in which most nonzeros are near the diagonal, 2294 one expects `d_nz` >> `o_nz`. 2295 2296 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2297 @*/ 2298 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2299 { 2300 PetscFunctionBegin; 2301 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2302 PetscValidType(B, 1); 2303 PetscValidLogicalCollectiveInt(B, bs, 2); 2304 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2305 PetscFunctionReturn(PETSC_SUCCESS); 2306 } 2307 2308 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2309 /*@ 2310 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2311 (block compressed row). For good matrix assembly performance 2312 the user should preallocate the matrix storage by setting the parameters 2313 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2314 2315 Collective 2316 2317 Input Parameters: 2318 + comm - MPI communicator 2319 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2320 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2321 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2322 This value should be the same as the local size used in creating the 2323 y vector for the matrix-vector product y = Ax. 2324 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2325 This value should be the same as the local size used in creating the 2326 x vector for the matrix-vector product y = Ax. 2327 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2328 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2329 . d_nz - number of block nonzeros per block row in diagonal portion of local 2330 submatrix (same for all local rows) 2331 . d_nnz - array containing the number of block nonzeros in the various block rows 2332 in the upper triangular portion of the in diagonal portion of the local 2333 (possibly different for each block block row) or `NULL`. 2334 If you plan to factor the matrix you must leave room for the diagonal entry and 2335 set its value even if it is zero. 2336 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2337 submatrix (same for all local rows). 2338 - o_nnz - array containing the number of nonzeros in the various block rows of the 2339 off-diagonal portion of the local submatrix (possibly different for 2340 each block row) or `NULL`. 2341 2342 Output Parameter: 2343 . A - the matrix 2344 2345 Options Database Keys: 2346 + -mat_no_unroll - uses code that does not unroll the loops in the 2347 block calculations (much slower) 2348 . -mat_block_size - size of the blocks to use 2349 - -mat_mpi - use the parallel matrix data structures even on one processor 2350 (defaults to using SeqBAIJ format on one processor) 2351 2352 Level: intermediate 2353 2354 Notes: 2355 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2356 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2357 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2358 2359 The number of rows and columns must be divisible by blocksize. 2360 This matrix type does not support complex Hermitian operation. 2361 2362 The user MUST specify either the local or global matrix dimensions 2363 (possibly both). 2364 2365 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2366 than it must be used on all processors that share the object for that argument. 2367 2368 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by 2369 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 2370 2371 If the *_nnz parameter is given then the *_nz parameter is ignored 2372 2373 Storage Information: 2374 For a square global matrix we define each processor's diagonal portion 2375 to be its local rows and the corresponding columns (a square submatrix); 2376 each processor's off-diagonal portion encompasses the remainder of the 2377 local matrix (a rectangular submatrix). 2378 2379 The user can specify preallocated storage for the diagonal part of 2380 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2381 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2382 memory allocation. Likewise, specify preallocated storage for the 2383 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2384 2385 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2386 the figure below we depict these three local rows and all columns (0-11). 2387 2388 .vb 2389 0 1 2 3 4 5 6 7 8 9 10 11 2390 -------------------------- 2391 row 3 |. . . d d d o o o o o o 2392 row 4 |. . . d d d o o o o o o 2393 row 5 |. . . d d d o o o o o o 2394 -------------------------- 2395 .ve 2396 2397 Thus, any entries in the d locations are stored in the d (diagonal) 2398 submatrix, and any entries in the o locations are stored in the 2399 o (off-diagonal) submatrix. Note that the d matrix is stored in 2400 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2401 2402 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2403 plus the diagonal part of the d matrix, 2404 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2405 In general, for PDE problems in which most nonzeros are near the diagonal, 2406 one expects `d_nz` >> `o_nz`. 2407 2408 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, 2409 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 2410 @*/ 2411 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2412 { 2413 PetscMPIInt size; 2414 2415 PetscFunctionBegin; 2416 PetscCall(MatCreate(comm, A)); 2417 PetscCall(MatSetSizes(*A, m, n, M, N)); 2418 PetscCallMPI(MPI_Comm_size(comm, &size)); 2419 if (size > 1) { 2420 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2421 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2422 } else { 2423 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2424 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2425 } 2426 PetscFunctionReturn(PETSC_SUCCESS); 2427 } 2428 2429 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2430 { 2431 Mat mat; 2432 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2433 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2434 PetscScalar *array; 2435 2436 PetscFunctionBegin; 2437 *newmat = NULL; 2438 2439 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2440 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2441 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2442 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2443 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2444 2445 if (matin->hash_active) { 2446 PetscCall(MatSetUp(mat)); 2447 } else { 2448 mat->factortype = matin->factortype; 2449 mat->preallocated = PETSC_TRUE; 2450 mat->assembled = PETSC_TRUE; 2451 mat->insertmode = NOT_SET_VALUES; 2452 2453 a = (Mat_MPISBAIJ *)mat->data; 2454 a->bs2 = oldmat->bs2; 2455 a->mbs = oldmat->mbs; 2456 a->nbs = oldmat->nbs; 2457 a->Mbs = oldmat->Mbs; 2458 a->Nbs = oldmat->Nbs; 2459 2460 a->size = oldmat->size; 2461 a->rank = oldmat->rank; 2462 a->donotstash = oldmat->donotstash; 2463 a->roworiented = oldmat->roworiented; 2464 a->rowindices = NULL; 2465 a->rowvalues = NULL; 2466 a->getrowactive = PETSC_FALSE; 2467 a->barray = NULL; 2468 a->rstartbs = oldmat->rstartbs; 2469 a->rendbs = oldmat->rendbs; 2470 a->cstartbs = oldmat->cstartbs; 2471 a->cendbs = oldmat->cendbs; 2472 2473 /* hash table stuff */ 2474 a->ht = NULL; 2475 a->hd = NULL; 2476 a->ht_size = 0; 2477 a->ht_flag = oldmat->ht_flag; 2478 a->ht_fact = oldmat->ht_fact; 2479 a->ht_total_ct = 0; 2480 a->ht_insert_ct = 0; 2481 2482 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2483 if (oldmat->colmap) { 2484 #if defined(PETSC_USE_CTABLE) 2485 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2486 #else 2487 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2488 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2489 #endif 2490 } else a->colmap = NULL; 2491 2492 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 2493 PetscCall(PetscMalloc1(len, &a->garray)); 2494 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2495 } else a->garray = NULL; 2496 2497 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2498 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2499 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2500 2501 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2502 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2503 2504 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2505 PetscCall(VecGetArray(a->slvec1, &array)); 2506 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2507 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2508 PetscCall(VecRestoreArray(a->slvec1, &array)); 2509 PetscCall(VecGetArray(a->slvec0, &array)); 2510 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2511 PetscCall(VecRestoreArray(a->slvec0, &array)); 2512 2513 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2514 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2515 a->sMvctx = oldmat->sMvctx; 2516 2517 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2518 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2519 } 2520 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2521 *newmat = mat; 2522 PetscFunctionReturn(PETSC_SUCCESS); 2523 } 2524 2525 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2526 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2527 2528 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2529 { 2530 PetscBool isbinary; 2531 2532 PetscFunctionBegin; 2533 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2534 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2535 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2536 PetscFunctionReturn(PETSC_SUCCESS); 2537 } 2538 2539 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2540 { 2541 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2542 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data; 2543 PetscReal atmp; 2544 PetscReal *work, *svalues, *rvalues; 2545 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2546 PetscMPIInt rank, size; 2547 PetscInt *rowners_bs, count, source; 2548 PetscScalar *va; 2549 MatScalar *ba; 2550 MPI_Status stat; 2551 2552 PetscFunctionBegin; 2553 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2554 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2555 PetscCall(VecGetArray(v, &va)); 2556 2557 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2558 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2559 2560 bs = A->rmap->bs; 2561 mbs = a->mbs; 2562 Mbs = a->Mbs; 2563 ba = b->a; 2564 bi = b->i; 2565 bj = b->j; 2566 2567 /* find ownerships */ 2568 rowners_bs = A->rmap->range; 2569 2570 /* each proc creates an array to be distributed */ 2571 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2572 2573 /* row_max for B */ 2574 if (rank != size - 1) { 2575 for (i = 0; i < mbs; i++) { 2576 ncols = bi[1] - bi[0]; 2577 bi++; 2578 brow = bs * i; 2579 for (j = 0; j < ncols; j++) { 2580 bcol = bs * (*bj); 2581 for (kcol = 0; kcol < bs; kcol++) { 2582 col = bcol + kcol; /* local col index */ 2583 col += rowners_bs[rank + 1]; /* global col index */ 2584 for (krow = 0; krow < bs; krow++) { 2585 atmp = PetscAbsScalar(*ba); 2586 ba++; 2587 row = brow + krow; /* local row index */ 2588 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2589 if (work[col] < atmp) work[col] = atmp; 2590 } 2591 } 2592 bj++; 2593 } 2594 } 2595 2596 /* send values to its owners */ 2597 for (PetscMPIInt dest = rank + 1; dest < size; dest++) { 2598 svalues = work + rowners_bs[dest]; 2599 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2600 PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2601 } 2602 } 2603 2604 /* receive values */ 2605 if (rank) { 2606 rvalues = work; 2607 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2608 for (source = 0; source < rank; source++) { 2609 PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2610 /* process values */ 2611 for (i = 0; i < count; i++) { 2612 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2613 } 2614 } 2615 } 2616 2617 PetscCall(VecRestoreArray(v, &va)); 2618 PetscCall(PetscFree(work)); 2619 PetscFunctionReturn(PETSC_SUCCESS); 2620 } 2621 2622 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2623 { 2624 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2625 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2626 PetscScalar *x, *ptr, *from; 2627 Vec bb1; 2628 const PetscScalar *b; 2629 2630 PetscFunctionBegin; 2631 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2632 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2633 2634 if (flag == SOR_APPLY_UPPER) { 2635 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2636 PetscFunctionReturn(PETSC_SUCCESS); 2637 } 2638 2639 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2640 if (flag & SOR_ZERO_INITIAL_GUESS) { 2641 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2642 its--; 2643 } 2644 2645 PetscCall(VecDuplicate(bb, &bb1)); 2646 while (its--) { 2647 /* lower triangular part: slvec0b = - B^T*xx */ 2648 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2649 2650 /* copy xx into slvec0a */ 2651 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2652 PetscCall(VecGetArray(xx, &x)); 2653 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2654 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2655 2656 PetscCall(VecScale(mat->slvec0, -1.0)); 2657 2658 /* copy bb into slvec1a */ 2659 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2660 PetscCall(VecGetArrayRead(bb, &b)); 2661 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2662 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2663 2664 /* set slvec1b = 0 */ 2665 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2666 PetscCall(VecZeroEntries(mat->slvec1b)); 2667 2668 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2669 PetscCall(VecRestoreArray(xx, &x)); 2670 PetscCall(VecRestoreArrayRead(bb, &b)); 2671 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2672 2673 /* upper triangular part: bb1 = bb1 - B*x */ 2674 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2675 2676 /* local diagonal sweep */ 2677 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2678 } 2679 PetscCall(VecDestroy(&bb1)); 2680 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2681 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2682 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2683 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2684 } else if (flag & SOR_EISENSTAT) { 2685 Vec xx1; 2686 PetscBool hasop; 2687 const PetscScalar *diag; 2688 PetscScalar *sl, scale = (omega - 2.0) / omega; 2689 PetscInt i, n; 2690 2691 if (!mat->xx1) { 2692 PetscCall(VecDuplicate(bb, &mat->xx1)); 2693 PetscCall(VecDuplicate(bb, &mat->bb1)); 2694 } 2695 xx1 = mat->xx1; 2696 bb1 = mat->bb1; 2697 2698 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2699 2700 if (!mat->diag) { 2701 /* this is wrong for same matrix with new nonzero values */ 2702 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2703 PetscCall(MatGetDiagonal(matin, mat->diag)); 2704 } 2705 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2706 2707 if (hasop) { 2708 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2709 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2710 } else { 2711 /* 2712 These two lines are replaced by code that may be a bit faster for a good compiler 2713 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2714 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2715 */ 2716 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2717 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2718 PetscCall(VecGetArrayRead(bb, &b)); 2719 PetscCall(VecGetArray(xx, &x)); 2720 PetscCall(VecGetLocalSize(xx, &n)); 2721 if (omega == 1.0) { 2722 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2723 PetscCall(PetscLogFlops(2.0 * n)); 2724 } else { 2725 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2726 PetscCall(PetscLogFlops(3.0 * n)); 2727 } 2728 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2729 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2730 PetscCall(VecRestoreArrayRead(bb, &b)); 2731 PetscCall(VecRestoreArray(xx, &x)); 2732 } 2733 2734 /* multiply off-diagonal portion of matrix */ 2735 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2736 PetscCall(VecZeroEntries(mat->slvec1b)); 2737 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2738 PetscCall(VecGetArray(mat->slvec0, &from)); 2739 PetscCall(VecGetArray(xx, &x)); 2740 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2741 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2742 PetscCall(VecRestoreArray(xx, &x)); 2743 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2744 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2745 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2746 2747 /* local sweep */ 2748 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2749 PetscCall(VecAXPY(xx, 1.0, xx1)); 2750 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2751 PetscFunctionReturn(PETSC_SUCCESS); 2752 } 2753 2754 /*@ 2755 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows. 2756 2757 Collective 2758 2759 Input Parameters: 2760 + comm - MPI communicator 2761 . bs - the block size, only a block size of 1 is supported 2762 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2763 . n - This value should be the same as the local size used in creating the 2764 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 2765 calculated if `N` is given) For square matrices `n` is almost always `m`. 2766 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2767 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2768 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2769 . j - column indices 2770 - a - matrix values 2771 2772 Output Parameter: 2773 . mat - the matrix 2774 2775 Level: intermediate 2776 2777 Notes: 2778 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2779 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2780 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2781 2782 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2783 2784 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2785 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()` 2786 @*/ 2787 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2788 { 2789 PetscFunctionBegin; 2790 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2791 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2792 PetscCall(MatCreate(comm, mat)); 2793 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2794 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2795 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2796 PetscFunctionReturn(PETSC_SUCCESS); 2797 } 2798 2799 /*@ 2800 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2801 2802 Collective 2803 2804 Input Parameters: 2805 + B - the matrix 2806 . bs - the block size 2807 . i - the indices into `j` for the start of each local row (indices start with zero) 2808 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 2809 - v - optional values in the matrix, pass `NULL` if not provided 2810 2811 Level: advanced 2812 2813 Notes: 2814 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2815 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2816 called this routine. 2817 2818 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2819 and usually the numerical values as well 2820 2821 Any entries passed in that are below the diagonal are ignored 2822 2823 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 2824 `MatCreateMPISBAIJWithArrays()` 2825 @*/ 2826 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2827 { 2828 PetscFunctionBegin; 2829 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2830 PetscFunctionReturn(PETSC_SUCCESS); 2831 } 2832 2833 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2834 { 2835 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2836 PetscInt *indx; 2837 PetscScalar *values; 2838 2839 PetscFunctionBegin; 2840 PetscCall(MatGetSize(inmat, &m, &N)); 2841 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2842 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2843 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2844 PetscInt *bindx, rmax = a->rmax, j; 2845 PetscMPIInt rank, size; 2846 2847 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2848 mbs = m / bs; 2849 Nbs = N / cbs; 2850 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2851 nbs = n / cbs; 2852 2853 PetscCall(PetscMalloc1(rmax, &bindx)); 2854 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2855 2856 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2857 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2858 if (rank == size - 1) { 2859 /* Check sum(nbs) = Nbs */ 2860 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2861 } 2862 2863 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2864 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2865 for (i = 0; i < mbs; i++) { 2866 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2867 nnz = nnz / bs; 2868 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2869 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2870 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2871 } 2872 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2873 PetscCall(PetscFree(bindx)); 2874 2875 PetscCall(MatCreate(comm, outmat)); 2876 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2877 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2878 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2879 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2880 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2881 MatPreallocateEnd(dnz, onz); 2882 } 2883 2884 /* numeric phase */ 2885 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2886 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2887 2888 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2889 for (i = 0; i < m; i++) { 2890 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2891 Ii = i + rstart; 2892 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2893 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2894 } 2895 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2896 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2897 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2898 PetscFunctionReturn(PETSC_SUCCESS); 2899 } 2900