1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 A->nonzerostate++; \ 238 a_noinsert:; \ 239 ailen[brow] = nrow; \ 240 } while (0) 241 242 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 243 do { \ 244 brow = row / bs; \ 245 rp = bj + bi[brow]; \ 246 ap = ba + bs2 * bi[brow]; \ 247 rmax = bimax[brow]; \ 248 nrow = bilen[brow]; \ 249 bcol = col / bs; \ 250 ridx = row % bs; \ 251 cidx = col % bs; \ 252 low = 0; \ 253 high = nrow; \ 254 while (high - low > 3) { \ 255 t = (low + high) / 2; \ 256 if (rp[t] > bcol) high = t; \ 257 else low = t; \ 258 } \ 259 for (_i = low; _i < high; _i++) { \ 260 if (rp[_i] > bcol) break; \ 261 if (rp[_i] == bcol) { \ 262 bap = ap + bs2 * _i + bs * cidx + ridx; \ 263 if (addv == ADD_VALUES) *bap += value; \ 264 else *bap = value; \ 265 goto b_noinsert; \ 266 } \ 267 } \ 268 if (b->nonew == 1) goto b_noinsert; \ 269 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 270 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 271 N = nrow++ - 1; \ 272 /* shift up all the later entries in this row */ \ 273 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 274 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 275 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 276 rp[_i] = bcol; \ 277 ap[bs2 * _i + bs * cidx + ridx] = value; \ 278 B->nonzerostate++; \ 279 b_noinsert:; \ 280 bilen[brow] = nrow; \ 281 } while (0) 282 283 /* Only add/insert a(i,j) with i<=j (blocks). 284 Any a(i,j) with i>j input by user is ignored or generates an error 285 */ 286 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 287 { 288 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 289 MatScalar value; 290 PetscBool roworiented = baij->roworiented; 291 PetscInt i, j, row, col; 292 PetscInt rstart_orig = mat->rmap->rstart; 293 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 294 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 295 296 /* Some Variables required in the macro */ 297 Mat A = baij->A; 298 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 299 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 300 MatScalar *aa = a->a; 301 302 Mat B = baij->B; 303 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 304 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 305 MatScalar *ba = b->a; 306 307 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 308 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 309 MatScalar *ap, *bap; 310 311 /* for stash */ 312 PetscInt n_loc, *in_loc = NULL; 313 MatScalar *v_loc = NULL; 314 315 PetscFunctionBegin; 316 if (!baij->donotstash) { 317 if (n > baij->n_loc) { 318 PetscCall(PetscFree(baij->in_loc)); 319 PetscCall(PetscFree(baij->v_loc)); 320 PetscCall(PetscMalloc1(n, &baij->in_loc)); 321 PetscCall(PetscMalloc1(n, &baij->v_loc)); 322 323 baij->n_loc = n; 324 } 325 in_loc = baij->in_loc; 326 v_loc = baij->v_loc; 327 } 328 329 for (i = 0; i < m; i++) { 330 if (im[i] < 0) continue; 331 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 332 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 333 row = im[i] - rstart_orig; /* local row index */ 334 for (j = 0; j < n; j++) { 335 if (im[i] / bs > in[j] / bs) { 336 if (a->ignore_ltriangular) { 337 continue; /* ignore lower triangular blocks */ 338 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 339 } 340 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 341 col = in[j] - cstart_orig; /* local col index */ 342 brow = row / bs; 343 bcol = col / bs; 344 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 345 if (roworiented) value = v[i * n + j]; 346 else value = v[i + j * m]; 347 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 348 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 349 } else if (in[j] < 0) { 350 continue; 351 } else { 352 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 353 /* off-diag entry (B) */ 354 if (mat->was_assembled) { 355 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 356 #if defined(PETSC_USE_CTABLE) 357 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 358 col = col - 1; 359 #else 360 col = baij->colmap[in[j] / bs] - 1; 361 #endif 362 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 363 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 364 col = in[j]; 365 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 366 B = baij->B; 367 b = (Mat_SeqBAIJ *)(B)->data; 368 bimax = b->imax; 369 bi = b->i; 370 bilen = b->ilen; 371 bj = b->j; 372 ba = b->a; 373 } else col += in[j] % bs; 374 } else col = in[j]; 375 if (roworiented) value = v[i * n + j]; 376 else value = v[i + j * m]; 377 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 378 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 379 } 380 } 381 } else { /* off processor entry */ 382 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 383 if (!baij->donotstash) { 384 mat->assembled = PETSC_FALSE; 385 n_loc = 0; 386 for (j = 0; j < n; j++) { 387 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 388 in_loc[n_loc] = in[j]; 389 if (roworiented) { 390 v_loc[n_loc] = v[i * n + j]; 391 } else { 392 v_loc[n_loc] = v[j * m + i]; 393 } 394 n_loc++; 395 } 396 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 397 } 398 } 399 } 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 404 { 405 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 406 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 407 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 408 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 409 PetscBool roworiented = a->roworiented; 410 const PetscScalar *value = v; 411 MatScalar *ap, *aa = a->a, *bap; 412 413 PetscFunctionBegin; 414 if (col < row) { 415 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 416 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 417 } 418 rp = aj + ai[row]; 419 ap = aa + bs2 * ai[row]; 420 rmax = imax[row]; 421 nrow = ailen[row]; 422 value = v; 423 low = 0; 424 high = nrow; 425 426 while (high - low > 7) { 427 t = (low + high) / 2; 428 if (rp[t] > col) high = t; 429 else low = t; 430 } 431 for (i = low; i < high; i++) { 432 if (rp[i] > col) break; 433 if (rp[i] == col) { 434 bap = ap + bs2 * i; 435 if (roworiented) { 436 if (is == ADD_VALUES) { 437 for (ii = 0; ii < bs; ii++) { 438 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 439 } 440 } else { 441 for (ii = 0; ii < bs; ii++) { 442 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 443 } 444 } 445 } else { 446 if (is == ADD_VALUES) { 447 for (ii = 0; ii < bs; ii++) { 448 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 449 } 450 } else { 451 for (ii = 0; ii < bs; ii++) { 452 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 453 } 454 } 455 } 456 goto noinsert2; 457 } 458 } 459 if (nonew == 1) goto noinsert2; 460 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 461 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 462 N = nrow++ - 1; 463 high++; 464 /* shift up all the later entries in this row */ 465 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 466 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 467 rp[i] = col; 468 bap = ap + bs2 * i; 469 if (roworiented) { 470 for (ii = 0; ii < bs; ii++) { 471 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 472 } 473 } else { 474 for (ii = 0; ii < bs; ii++) { 475 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 476 } 477 } 478 noinsert2:; 479 ailen[row] = nrow; 480 PetscFunctionReturn(PETSC_SUCCESS); 481 } 482 483 /* 484 This routine is exactly duplicated in mpibaij.c 485 */ 486 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 487 { 488 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 489 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 490 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 491 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 492 PetscBool roworiented = a->roworiented; 493 const PetscScalar *value = v; 494 MatScalar *ap, *aa = a->a, *bap; 495 496 PetscFunctionBegin; 497 rp = aj + ai[row]; 498 ap = aa + bs2 * ai[row]; 499 rmax = imax[row]; 500 nrow = ailen[row]; 501 low = 0; 502 high = nrow; 503 value = v; 504 while (high - low > 7) { 505 t = (low + high) / 2; 506 if (rp[t] > col) high = t; 507 else low = t; 508 } 509 for (i = low; i < high; i++) { 510 if (rp[i] > col) break; 511 if (rp[i] == col) { 512 bap = ap + bs2 * i; 513 if (roworiented) { 514 if (is == ADD_VALUES) { 515 for (ii = 0; ii < bs; ii++) { 516 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 517 } 518 } else { 519 for (ii = 0; ii < bs; ii++) { 520 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 521 } 522 } 523 } else { 524 if (is == ADD_VALUES) { 525 for (ii = 0; ii < bs; ii++, value += bs) { 526 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 527 bap += bs; 528 } 529 } else { 530 for (ii = 0; ii < bs; ii++, value += bs) { 531 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 532 bap += bs; 533 } 534 } 535 } 536 goto noinsert2; 537 } 538 } 539 if (nonew == 1) goto noinsert2; 540 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 541 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 542 N = nrow++ - 1; 543 high++; 544 /* shift up all the later entries in this row */ 545 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 546 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 547 rp[i] = col; 548 bap = ap + bs2 * i; 549 if (roworiented) { 550 for (ii = 0; ii < bs; ii++) { 551 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 552 } 553 } else { 554 for (ii = 0; ii < bs; ii++) { 555 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 556 } 557 } 558 noinsert2:; 559 ailen[row] = nrow; 560 PetscFunctionReturn(PETSC_SUCCESS); 561 } 562 563 /* 564 This routine could be optimized by removing the need for the block copy below and passing stride information 565 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 566 */ 567 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 568 { 569 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 570 const MatScalar *value; 571 MatScalar *barray = baij->barray; 572 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 573 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 574 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 575 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 576 577 PetscFunctionBegin; 578 if (!barray) { 579 PetscCall(PetscMalloc1(bs2, &barray)); 580 baij->barray = barray; 581 } 582 583 if (roworiented) { 584 stepval = (n - 1) * bs; 585 } else { 586 stepval = (m - 1) * bs; 587 } 588 for (i = 0; i < m; i++) { 589 if (im[i] < 0) continue; 590 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 591 if (im[i] >= rstart && im[i] < rend) { 592 row = im[i] - rstart; 593 for (j = 0; j < n; j++) { 594 if (im[i] > in[j]) { 595 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 596 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 597 } 598 /* If NumCol = 1 then a copy is not required */ 599 if ((roworiented) && (n == 1)) { 600 barray = (MatScalar *)v + i * bs2; 601 } else if ((!roworiented) && (m == 1)) { 602 barray = (MatScalar *)v + j * bs2; 603 } else { /* Here a copy is required */ 604 if (roworiented) { 605 value = v + i * (stepval + bs) * bs + j * bs; 606 } else { 607 value = v + j * (stepval + bs) * bs + i * bs; 608 } 609 for (ii = 0; ii < bs; ii++, value += stepval) { 610 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 611 } 612 barray -= bs2; 613 } 614 615 if (in[j] >= cstart && in[j] < cend) { 616 col = in[j] - cstart; 617 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 618 } else if (in[j] < 0) { 619 continue; 620 } else { 621 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 622 if (mat->was_assembled) { 623 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 624 625 #if defined(PETSC_USE_DEBUG) 626 #if defined(PETSC_USE_CTABLE) 627 { 628 PetscInt data; 629 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 630 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 631 } 632 #else 633 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 634 #endif 635 #endif 636 #if defined(PETSC_USE_CTABLE) 637 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 638 col = (col - 1) / bs; 639 #else 640 col = (baij->colmap[in[j]] - 1) / bs; 641 #endif 642 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 643 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 644 col = in[j]; 645 } 646 } else col = in[j]; 647 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 648 } 649 } 650 } else { 651 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 652 if (!baij->donotstash) { 653 if (roworiented) { 654 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 655 } else { 656 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 657 } 658 } 659 } 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 665 { 666 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 667 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 668 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 669 670 PetscFunctionBegin; 671 for (i = 0; i < m; i++) { 672 if (idxm[i] < 0) continue; /* negative row */ 673 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 674 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 675 row = idxm[i] - bsrstart; 676 for (j = 0; j < n; j++) { 677 if (idxn[j] < 0) continue; /* negative column */ 678 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 679 if (idxn[j] >= bscstart && idxn[j] < bscend) { 680 col = idxn[j] - bscstart; 681 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 682 } else { 683 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 684 #if defined(PETSC_USE_CTABLE) 685 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 686 data--; 687 #else 688 data = baij->colmap[idxn[j] / bs] - 1; 689 #endif 690 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 691 else { 692 col = data + idxn[j] % bs; 693 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 694 } 695 } 696 } 697 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 698 } 699 PetscFunctionReturn(PETSC_SUCCESS); 700 } 701 702 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 703 { 704 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 705 PetscReal sum[2], *lnorm2; 706 707 PetscFunctionBegin; 708 if (baij->size == 1) { 709 PetscCall(MatNorm(baij->A, type, norm)); 710 } else { 711 if (type == NORM_FROBENIUS) { 712 PetscCall(PetscMalloc1(2, &lnorm2)); 713 PetscCall(MatNorm(baij->A, type, lnorm2)); 714 *lnorm2 = (*lnorm2) * (*lnorm2); 715 lnorm2++; /* squar power of norm(A) */ 716 PetscCall(MatNorm(baij->B, type, lnorm2)); 717 *lnorm2 = (*lnorm2) * (*lnorm2); 718 lnorm2--; /* squar power of norm(B) */ 719 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 720 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 721 PetscCall(PetscFree(lnorm2)); 722 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 723 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 724 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 725 PetscReal *rsum, *rsum2, vabs; 726 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 727 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 728 MatScalar *v; 729 730 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 731 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 732 /* Amat */ 733 v = amat->a; 734 jj = amat->j; 735 for (brow = 0; brow < mbs; brow++) { 736 grow = bs * (rstart + brow); 737 nz = amat->i[brow + 1] - amat->i[brow]; 738 for (bcol = 0; bcol < nz; bcol++) { 739 gcol = bs * (rstart + *jj); 740 jj++; 741 for (col = 0; col < bs; col++) { 742 for (row = 0; row < bs; row++) { 743 vabs = PetscAbsScalar(*v); 744 v++; 745 rsum[gcol + col] += vabs; 746 /* non-diagonal block */ 747 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 748 } 749 } 750 } 751 PetscCall(PetscLogFlops(nz * bs * bs)); 752 } 753 /* Bmat */ 754 v = bmat->a; 755 jj = bmat->j; 756 for (brow = 0; brow < mbs; brow++) { 757 grow = bs * (rstart + brow); 758 nz = bmat->i[brow + 1] - bmat->i[brow]; 759 for (bcol = 0; bcol < nz; bcol++) { 760 gcol = bs * garray[*jj]; 761 jj++; 762 for (col = 0; col < bs; col++) { 763 for (row = 0; row < bs; row++) { 764 vabs = PetscAbsScalar(*v); 765 v++; 766 rsum[gcol + col] += vabs; 767 rsum[grow + row] += vabs; 768 } 769 } 770 } 771 PetscCall(PetscLogFlops(nz * bs * bs)); 772 } 773 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 774 *norm = 0.0; 775 for (col = 0; col < mat->cmap->N; col++) { 776 if (rsum2[col] > *norm) *norm = rsum2[col]; 777 } 778 PetscCall(PetscFree2(rsum, rsum2)); 779 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 780 } 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 785 { 786 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 787 PetscInt nstash, reallocs; 788 789 PetscFunctionBegin; 790 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 791 792 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 793 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 794 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 795 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 796 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 797 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 802 { 803 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 804 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 805 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 806 PetscInt *row, *col; 807 PetscBool other_disassembled; 808 PetscMPIInt n; 809 PetscBool r1, r2, r3; 810 MatScalar *val; 811 812 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 813 PetscFunctionBegin; 814 if (!baij->donotstash && !mat->nooffprocentries) { 815 while (1) { 816 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 817 if (!flg) break; 818 819 for (i = 0; i < n;) { 820 /* Now identify the consecutive vals belonging to the same row */ 821 for (j = i, rstart = row[j]; j < n; j++) { 822 if (row[j] != rstart) break; 823 } 824 if (j < n) ncols = j - i; 825 else ncols = n - i; 826 /* Now assemble all these values with a single function call */ 827 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 828 i = j; 829 } 830 } 831 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 832 /* Now process the block-stash. Since the values are stashed column-oriented, 833 set the roworiented flag to column oriented, and after MatSetValues() 834 restore the original flags */ 835 r1 = baij->roworiented; 836 r2 = a->roworiented; 837 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 838 839 baij->roworiented = PETSC_FALSE; 840 a->roworiented = PETSC_FALSE; 841 842 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 843 while (1) { 844 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 845 if (!flg) break; 846 847 for (i = 0; i < n;) { 848 /* Now identify the consecutive vals belonging to the same row */ 849 for (j = i, rstart = row[j]; j < n; j++) { 850 if (row[j] != rstart) break; 851 } 852 if (j < n) ncols = j - i; 853 else ncols = n - i; 854 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 855 i = j; 856 } 857 } 858 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 859 860 baij->roworiented = r1; 861 a->roworiented = r2; 862 863 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 864 } 865 866 PetscCall(MatAssemblyBegin(baij->A, mode)); 867 PetscCall(MatAssemblyEnd(baij->A, mode)); 868 869 /* determine if any processor has disassembled, if so we must 870 also disassemble ourselves, in order that we may reassemble. */ 871 /* 872 if nonzero structure of submatrix B cannot change then we know that 873 no processor disassembled thus we can skip this stuff 874 */ 875 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 876 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 877 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 878 } 879 880 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 881 PetscCall(MatAssemblyBegin(baij->B, mode)); 882 PetscCall(MatAssemblyEnd(baij->B, mode)); 883 884 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 885 886 baij->rowvalues = NULL; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 890 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 891 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 892 } 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 897 #include <petscdraw.h> 898 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 899 { 900 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 901 PetscInt bs = mat->rmap->bs; 902 PetscMPIInt rank = baij->rank; 903 PetscBool iascii, isdraw; 904 PetscViewer sviewer; 905 PetscViewerFormat format; 906 907 PetscFunctionBegin; 908 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 909 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 910 if (iascii) { 911 PetscCall(PetscViewerGetFormat(viewer, &format)); 912 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 913 MatInfo info; 914 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 915 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 916 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 917 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 918 mat->rmap->bs, (double)info.memory)); 919 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 920 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 921 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 922 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 923 PetscCall(PetscViewerFlush(viewer)); 924 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 925 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 926 PetscCall(VecScatterView(baij->Mvctx, viewer)); 927 PetscFunctionReturn(PETSC_SUCCESS); 928 } else if (format == PETSC_VIEWER_ASCII_INFO) { 929 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 930 PetscFunctionReturn(PETSC_SUCCESS); 931 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 932 PetscFunctionReturn(PETSC_SUCCESS); 933 } 934 } 935 936 if (isdraw) { 937 PetscDraw draw; 938 PetscBool isnull; 939 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 940 PetscCall(PetscDrawIsNull(draw, &isnull)); 941 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 942 } 943 944 { 945 /* assemble the entire matrix onto first processor. */ 946 Mat A; 947 Mat_SeqSBAIJ *Aloc; 948 Mat_SeqBAIJ *Bloc; 949 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 950 MatScalar *a; 951 const char *matname; 952 953 /* Should this be the same type as mat? */ 954 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 955 if (rank == 0) { 956 PetscCall(MatSetSizes(A, M, N, M, N)); 957 } else { 958 PetscCall(MatSetSizes(A, 0, 0, M, N)); 959 } 960 PetscCall(MatSetType(A, MATMPISBAIJ)); 961 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 962 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 963 964 /* copy over the A part */ 965 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 966 ai = Aloc->i; 967 aj = Aloc->j; 968 a = Aloc->a; 969 PetscCall(PetscMalloc1(bs, &rvals)); 970 971 for (i = 0; i < mbs; i++) { 972 rvals[0] = bs * (baij->rstartbs + i); 973 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 974 for (j = ai[i]; j < ai[i + 1]; j++) { 975 col = (baij->cstartbs + aj[j]) * bs; 976 for (k = 0; k < bs; k++) { 977 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 978 col++; 979 a += bs; 980 } 981 } 982 } 983 /* copy over the B part */ 984 Bloc = (Mat_SeqBAIJ *)baij->B->data; 985 ai = Bloc->i; 986 aj = Bloc->j; 987 a = Bloc->a; 988 for (i = 0; i < mbs; i++) { 989 rvals[0] = bs * (baij->rstartbs + i); 990 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 991 for (j = ai[i]; j < ai[i + 1]; j++) { 992 col = baij->garray[aj[j]] * bs; 993 for (k = 0; k < bs; k++) { 994 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 995 col++; 996 a += bs; 997 } 998 } 999 } 1000 PetscCall(PetscFree(rvals)); 1001 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1002 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1003 /* 1004 Everyone has to call to draw the matrix since the graphics waits are 1005 synchronized across all processors that share the PetscDraw object 1006 */ 1007 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1008 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1009 if (rank == 0) { 1010 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 1011 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 1012 } 1013 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1014 PetscCall(MatDestroy(&A)); 1015 } 1016 PetscFunctionReturn(PETSC_SUCCESS); 1017 } 1018 1019 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1020 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1021 1022 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1023 { 1024 PetscBool iascii, isdraw, issocket, isbinary; 1025 1026 PetscFunctionBegin; 1027 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1028 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1029 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1031 if (iascii || isdraw || issocket) { 1032 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1033 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } 1036 1037 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1038 { 1039 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1040 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1041 PetscScalar *from; 1042 const PetscScalar *x; 1043 1044 PetscFunctionBegin; 1045 /* diagonal part */ 1046 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1047 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1048 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1049 PetscCall(VecZeroEntries(a->slvec1b)); 1050 1051 /* subdiagonal part */ 1052 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1053 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1054 1055 /* copy x into the vec slvec0 */ 1056 PetscCall(VecGetArray(a->slvec0, &from)); 1057 PetscCall(VecGetArrayRead(xx, &x)); 1058 1059 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1060 PetscCall(VecRestoreArray(a->slvec0, &from)); 1061 PetscCall(VecRestoreArrayRead(xx, &x)); 1062 1063 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1064 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1065 /* supperdiagonal part */ 1066 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1073 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1074 PetscScalar *from; 1075 const PetscScalar *x; 1076 1077 PetscFunctionBegin; 1078 /* diagonal part */ 1079 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1080 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1081 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1082 PetscCall(VecZeroEntries(a->slvec1b)); 1083 1084 /* subdiagonal part */ 1085 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1086 1087 /* copy x into the vec slvec0 */ 1088 PetscCall(VecGetArray(a->slvec0, &from)); 1089 PetscCall(VecGetArrayRead(xx, &x)); 1090 1091 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1092 PetscCall(VecRestoreArray(a->slvec0, &from)); 1093 PetscCall(VecRestoreArrayRead(xx, &x)); 1094 1095 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1096 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1097 /* supperdiagonal part */ 1098 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1099 PetscFunctionReturn(PETSC_SUCCESS); 1100 } 1101 1102 #if PetscDefined(USE_COMPLEX) 1103 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1104 { 1105 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1106 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1107 PetscScalar *from; 1108 const PetscScalar *x; 1109 1110 PetscFunctionBegin; 1111 /* diagonal part */ 1112 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1113 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1114 PetscCall(VecZeroEntries(a->slvec1b)); 1115 1116 /* subdiagonal part */ 1117 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1118 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1119 1120 /* copy x into the vec slvec0 */ 1121 PetscCall(VecGetArray(a->slvec0, &from)); 1122 PetscCall(VecGetArrayRead(xx, &x)); 1123 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1124 PetscCall(VecRestoreArray(a->slvec0, &from)); 1125 1126 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1127 PetscCall(VecRestoreArrayRead(xx, &x)); 1128 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1129 1130 /* supperdiagonal part */ 1131 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 #endif 1135 1136 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1137 { 1138 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1139 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1140 PetscScalar *from; 1141 const PetscScalar *x; 1142 1143 PetscFunctionBegin; 1144 /* diagonal part */ 1145 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1146 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1147 PetscCall(VecZeroEntries(a->slvec1b)); 1148 1149 /* subdiagonal part */ 1150 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1151 1152 /* copy x into the vec slvec0 */ 1153 PetscCall(VecGetArray(a->slvec0, &from)); 1154 PetscCall(VecGetArrayRead(xx, &x)); 1155 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1156 PetscCall(VecRestoreArray(a->slvec0, &from)); 1157 1158 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1159 PetscCall(VecRestoreArrayRead(xx, &x)); 1160 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1161 1162 /* supperdiagonal part */ 1163 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 /* 1168 This only works correctly for square matrices where the subblock A->A is the 1169 diagonal block 1170 */ 1171 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1172 { 1173 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1174 1175 PetscFunctionBegin; 1176 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1177 PetscCall(MatGetDiagonal(a->A, v)); 1178 PetscFunctionReturn(PETSC_SUCCESS); 1179 } 1180 1181 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1182 { 1183 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1184 1185 PetscFunctionBegin; 1186 PetscCall(MatScale(a->A, aa)); 1187 PetscCall(MatScale(a->B, aa)); 1188 PetscFunctionReturn(PETSC_SUCCESS); 1189 } 1190 1191 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1192 { 1193 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1194 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1195 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1196 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1197 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1198 1199 PetscFunctionBegin; 1200 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1201 mat->getrowactive = PETSC_TRUE; 1202 1203 if (!mat->rowvalues && (idx || v)) { 1204 /* 1205 allocate enough space to hold information from the longest row. 1206 */ 1207 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1208 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1209 PetscInt max = 1, mbs = mat->mbs, tmp; 1210 for (i = 0; i < mbs; i++) { 1211 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1212 if (max < tmp) max = tmp; 1213 } 1214 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1215 } 1216 1217 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1218 lrow = row - brstart; /* local row index */ 1219 1220 pvA = &vworkA; 1221 pcA = &cworkA; 1222 pvB = &vworkB; 1223 pcB = &cworkB; 1224 if (!v) { 1225 pvA = NULL; 1226 pvB = NULL; 1227 } 1228 if (!idx) { 1229 pcA = NULL; 1230 if (!v) pcB = NULL; 1231 } 1232 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1233 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1234 nztot = nzA + nzB; 1235 1236 cmap = mat->garray; 1237 if (v || idx) { 1238 if (nztot) { 1239 /* Sort by increasing column numbers, assuming A and B already sorted */ 1240 PetscInt imark = -1; 1241 if (v) { 1242 *v = v_p = mat->rowvalues; 1243 for (i = 0; i < nzB; i++) { 1244 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1245 else break; 1246 } 1247 imark = i; 1248 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1249 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1250 } 1251 if (idx) { 1252 *idx = idx_p = mat->rowindices; 1253 if (imark > -1) { 1254 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1255 } else { 1256 for (i = 0; i < nzB; i++) { 1257 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1258 else break; 1259 } 1260 imark = i; 1261 } 1262 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1263 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1264 } 1265 } else { 1266 if (idx) *idx = NULL; 1267 if (v) *v = NULL; 1268 } 1269 } 1270 *nz = nztot; 1271 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1272 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1273 PetscFunctionReturn(PETSC_SUCCESS); 1274 } 1275 1276 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1277 { 1278 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1279 1280 PetscFunctionBegin; 1281 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1282 baij->getrowactive = PETSC_FALSE; 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 1286 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1287 { 1288 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1289 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1290 1291 PetscFunctionBegin; 1292 aA->getrow_utriangular = PETSC_TRUE; 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1296 { 1297 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1298 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1299 1300 PetscFunctionBegin; 1301 aA->getrow_utriangular = PETSC_FALSE; 1302 PetscFunctionReturn(PETSC_SUCCESS); 1303 } 1304 1305 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1306 { 1307 PetscFunctionBegin; 1308 if (PetscDefined(USE_COMPLEX)) { 1309 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1310 1311 PetscCall(MatConjugate(a->A)); 1312 PetscCall(MatConjugate(a->B)); 1313 } 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } 1316 1317 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1318 { 1319 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1320 1321 PetscFunctionBegin; 1322 PetscCall(MatRealPart(a->A)); 1323 PetscCall(MatRealPart(a->B)); 1324 PetscFunctionReturn(PETSC_SUCCESS); 1325 } 1326 1327 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1328 { 1329 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1330 1331 PetscFunctionBegin; 1332 PetscCall(MatImaginaryPart(a->A)); 1333 PetscCall(MatImaginaryPart(a->B)); 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } 1336 1337 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1338 Input: isrow - distributed(parallel), 1339 iscol_local - locally owned (seq) 1340 */ 1341 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1342 { 1343 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1344 const PetscInt *ptr1, *ptr2; 1345 1346 PetscFunctionBegin; 1347 *flg = PETSC_FALSE; 1348 PetscCall(ISGetLocalSize(isrow, &sz1)); 1349 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1350 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1351 1352 PetscCall(ISGetIndices(isrow, &ptr1)); 1353 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1354 1355 PetscCall(PetscMalloc1(sz1, &a1)); 1356 PetscCall(PetscMalloc1(sz2, &a2)); 1357 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1358 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1359 PetscCall(PetscSortInt(sz1, a1)); 1360 PetscCall(PetscSortInt(sz2, a2)); 1361 1362 nmatch = 0; 1363 k = 0; 1364 for (i = 0; i < sz1; i++) { 1365 for (j = k; j < sz2; j++) { 1366 if (a1[i] == a2[j]) { 1367 k = j; 1368 nmatch++; 1369 break; 1370 } 1371 } 1372 } 1373 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1374 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1375 PetscCall(PetscFree(a1)); 1376 PetscCall(PetscFree(a2)); 1377 if (nmatch < sz1) { 1378 *flg = PETSC_FALSE; 1379 } else { 1380 *flg = PETSC_TRUE; 1381 } 1382 PetscFunctionReturn(PETSC_SUCCESS); 1383 } 1384 1385 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1386 { 1387 Mat C[2]; 1388 IS iscol_local, isrow_local; 1389 PetscInt csize, csize_local, rsize; 1390 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1391 1392 PetscFunctionBegin; 1393 PetscCall(ISGetLocalSize(iscol, &csize)); 1394 PetscCall(ISGetLocalSize(isrow, &rsize)); 1395 if (call == MAT_REUSE_MATRIX) { 1396 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1397 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1398 } else { 1399 PetscCall(ISAllGather(iscol, &iscol_local)); 1400 PetscCall(ISSorted(iscol_local, &issorted)); 1401 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1402 } 1403 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1404 if (!isequal) { 1405 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1406 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1407 if (!isidentity) { 1408 if (call == MAT_REUSE_MATRIX) { 1409 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1410 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1411 } else { 1412 PetscCall(ISAllGather(isrow, &isrow_local)); 1413 PetscCall(ISSorted(isrow_local, &issorted)); 1414 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1415 } 1416 } 1417 } 1418 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1419 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1420 if (!isequal && !isidentity) { 1421 if (call == MAT_INITIAL_MATRIX) { 1422 IS intersect; 1423 PetscInt ni; 1424 1425 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1426 PetscCall(ISGetLocalSize(intersect, &ni)); 1427 PetscCall(ISDestroy(&intersect)); 1428 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1429 } 1430 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1431 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1432 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1433 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1434 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1435 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1436 PetscCall(MatDestroy(C)); 1437 PetscCall(MatDestroy(C + 1)); 1438 } 1439 if (call == MAT_INITIAL_MATRIX) { 1440 if (!isequal && !isidentity) { 1441 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1442 PetscCall(ISDestroy(&isrow_local)); 1443 } 1444 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1445 PetscCall(ISDestroy(&iscol_local)); 1446 } 1447 PetscFunctionReturn(PETSC_SUCCESS); 1448 } 1449 1450 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1451 { 1452 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1453 1454 PetscFunctionBegin; 1455 PetscCall(MatZeroEntries(l->A)); 1456 PetscCall(MatZeroEntries(l->B)); 1457 PetscFunctionReturn(PETSC_SUCCESS); 1458 } 1459 1460 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1461 { 1462 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1463 Mat A = a->A, B = a->B; 1464 PetscLogDouble isend[5], irecv[5]; 1465 1466 PetscFunctionBegin; 1467 info->block_size = (PetscReal)matin->rmap->bs; 1468 1469 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1470 1471 isend[0] = info->nz_used; 1472 isend[1] = info->nz_allocated; 1473 isend[2] = info->nz_unneeded; 1474 isend[3] = info->memory; 1475 isend[4] = info->mallocs; 1476 1477 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1478 1479 isend[0] += info->nz_used; 1480 isend[1] += info->nz_allocated; 1481 isend[2] += info->nz_unneeded; 1482 isend[3] += info->memory; 1483 isend[4] += info->mallocs; 1484 if (flag == MAT_LOCAL) { 1485 info->nz_used = isend[0]; 1486 info->nz_allocated = isend[1]; 1487 info->nz_unneeded = isend[2]; 1488 info->memory = isend[3]; 1489 info->mallocs = isend[4]; 1490 } else if (flag == MAT_GLOBAL_MAX) { 1491 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1492 1493 info->nz_used = irecv[0]; 1494 info->nz_allocated = irecv[1]; 1495 info->nz_unneeded = irecv[2]; 1496 info->memory = irecv[3]; 1497 info->mallocs = irecv[4]; 1498 } else if (flag == MAT_GLOBAL_SUM) { 1499 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1500 1501 info->nz_used = irecv[0]; 1502 info->nz_allocated = irecv[1]; 1503 info->nz_unneeded = irecv[2]; 1504 info->memory = irecv[3]; 1505 info->mallocs = irecv[4]; 1506 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1507 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1508 info->fill_ratio_needed = 0; 1509 info->factor_mallocs = 0; 1510 PetscFunctionReturn(PETSC_SUCCESS); 1511 } 1512 1513 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1514 { 1515 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1516 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1517 1518 PetscFunctionBegin; 1519 switch (op) { 1520 case MAT_NEW_NONZERO_LOCATIONS: 1521 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1522 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1523 case MAT_KEEP_NONZERO_PATTERN: 1524 case MAT_SUBMAT_SINGLEIS: 1525 case MAT_NEW_NONZERO_LOCATION_ERR: 1526 MatCheckPreallocated(A, 1); 1527 PetscCall(MatSetOption(a->A, op, flg)); 1528 PetscCall(MatSetOption(a->B, op, flg)); 1529 break; 1530 case MAT_ROW_ORIENTED: 1531 MatCheckPreallocated(A, 1); 1532 a->roworiented = flg; 1533 1534 PetscCall(MatSetOption(a->A, op, flg)); 1535 PetscCall(MatSetOption(a->B, op, flg)); 1536 break; 1537 case MAT_FORCE_DIAGONAL_ENTRIES: 1538 case MAT_SORTED_FULL: 1539 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1540 break; 1541 case MAT_IGNORE_OFF_PROC_ENTRIES: 1542 a->donotstash = flg; 1543 break; 1544 case MAT_USE_HASH_TABLE: 1545 a->ht_flag = flg; 1546 break; 1547 case MAT_HERMITIAN: 1548 MatCheckPreallocated(A, 1); 1549 PetscCall(MatSetOption(a->A, op, flg)); 1550 #if defined(PETSC_USE_COMPLEX) 1551 if (flg) { /* need different mat-vec ops */ 1552 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1553 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1554 A->ops->multtranspose = NULL; 1555 A->ops->multtransposeadd = NULL; 1556 A->symmetric = PETSC_BOOL3_FALSE; 1557 } 1558 #endif 1559 break; 1560 case MAT_SPD: 1561 case MAT_SYMMETRIC: 1562 MatCheckPreallocated(A, 1); 1563 PetscCall(MatSetOption(a->A, op, flg)); 1564 #if defined(PETSC_USE_COMPLEX) 1565 if (flg) { /* restore to use default mat-vec ops */ 1566 A->ops->mult = MatMult_MPISBAIJ; 1567 A->ops->multadd = MatMultAdd_MPISBAIJ; 1568 A->ops->multtranspose = MatMult_MPISBAIJ; 1569 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1570 } 1571 #endif 1572 break; 1573 case MAT_STRUCTURALLY_SYMMETRIC: 1574 MatCheckPreallocated(A, 1); 1575 PetscCall(MatSetOption(a->A, op, flg)); 1576 break; 1577 case MAT_SYMMETRY_ETERNAL: 1578 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1579 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1580 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1581 break; 1582 case MAT_SPD_ETERNAL: 1583 break; 1584 case MAT_IGNORE_LOWER_TRIANGULAR: 1585 aA->ignore_ltriangular = flg; 1586 break; 1587 case MAT_ERROR_LOWER_TRIANGULAR: 1588 aA->ignore_ltriangular = flg; 1589 break; 1590 case MAT_GETROW_UPPERTRIANGULAR: 1591 aA->getrow_utriangular = flg; 1592 break; 1593 default: 1594 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1595 } 1596 PetscFunctionReturn(PETSC_SUCCESS); 1597 } 1598 1599 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1600 { 1601 PetscFunctionBegin; 1602 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1603 if (reuse == MAT_INITIAL_MATRIX) { 1604 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1605 } else if (reuse == MAT_REUSE_MATRIX) { 1606 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1607 } 1608 PetscFunctionReturn(PETSC_SUCCESS); 1609 } 1610 1611 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1612 { 1613 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1614 Mat a = baij->A, b = baij->B; 1615 PetscInt nv, m, n; 1616 PetscBool flg; 1617 1618 PetscFunctionBegin; 1619 if (ll != rr) { 1620 PetscCall(VecEqual(ll, rr, &flg)); 1621 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1622 } 1623 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1624 1625 PetscCall(MatGetLocalSize(mat, &m, &n)); 1626 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1627 1628 PetscCall(VecGetLocalSize(rr, &nv)); 1629 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1630 1631 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1632 1633 /* left diagonalscale the off-diagonal part */ 1634 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1635 1636 /* scale the diagonal part */ 1637 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1638 1639 /* right diagonalscale the off-diagonal part */ 1640 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1641 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1642 PetscFunctionReturn(PETSC_SUCCESS); 1643 } 1644 1645 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1646 { 1647 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1648 1649 PetscFunctionBegin; 1650 PetscCall(MatSetUnfactored(a->A)); 1651 PetscFunctionReturn(PETSC_SUCCESS); 1652 } 1653 1654 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1655 1656 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1657 { 1658 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1659 Mat a, b, c, d; 1660 PetscBool flg; 1661 1662 PetscFunctionBegin; 1663 a = matA->A; 1664 b = matA->B; 1665 c = matB->A; 1666 d = matB->B; 1667 1668 PetscCall(MatEqual(a, c, &flg)); 1669 if (flg) PetscCall(MatEqual(b, d, &flg)); 1670 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1671 PetscFunctionReturn(PETSC_SUCCESS); 1672 } 1673 1674 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1675 { 1676 PetscBool isbaij; 1677 1678 PetscFunctionBegin; 1679 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1680 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1681 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1682 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1683 PetscCall(MatGetRowUpperTriangular(A)); 1684 PetscCall(MatCopy_Basic(A, B, str)); 1685 PetscCall(MatRestoreRowUpperTriangular(A)); 1686 } else { 1687 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1688 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1689 1690 PetscCall(MatCopy(a->A, b->A, str)); 1691 PetscCall(MatCopy(a->B, b->B, str)); 1692 } 1693 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1694 PetscFunctionReturn(PETSC_SUCCESS); 1695 } 1696 1697 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1698 { 1699 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1700 PetscBLASInt bnz, one = 1; 1701 Mat_SeqSBAIJ *xa, *ya; 1702 Mat_SeqBAIJ *xb, *yb; 1703 1704 PetscFunctionBegin; 1705 if (str == SAME_NONZERO_PATTERN) { 1706 PetscScalar alpha = a; 1707 xa = (Mat_SeqSBAIJ *)xx->A->data; 1708 ya = (Mat_SeqSBAIJ *)yy->A->data; 1709 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1710 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1711 xb = (Mat_SeqBAIJ *)xx->B->data; 1712 yb = (Mat_SeqBAIJ *)yy->B->data; 1713 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1714 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1715 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1716 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1717 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1718 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1719 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1720 } else { 1721 Mat B; 1722 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1723 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1724 PetscCall(MatGetRowUpperTriangular(X)); 1725 PetscCall(MatGetRowUpperTriangular(Y)); 1726 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1727 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1728 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1729 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1730 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1731 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1732 PetscCall(MatSetType(B, MATMPISBAIJ)); 1733 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1734 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1735 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1736 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1737 PetscCall(MatHeaderMerge(Y, &B)); 1738 PetscCall(PetscFree(nnz_d)); 1739 PetscCall(PetscFree(nnz_o)); 1740 PetscCall(MatRestoreRowUpperTriangular(X)); 1741 PetscCall(MatRestoreRowUpperTriangular(Y)); 1742 } 1743 PetscFunctionReturn(PETSC_SUCCESS); 1744 } 1745 1746 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1747 { 1748 PetscInt i; 1749 PetscBool flg; 1750 1751 PetscFunctionBegin; 1752 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1753 for (i = 0; i < n; i++) { 1754 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1755 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1756 } 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1761 { 1762 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1763 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1764 1765 PetscFunctionBegin; 1766 if (!Y->preallocated) { 1767 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1768 } else if (!aij->nz) { 1769 PetscInt nonew = aij->nonew; 1770 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1771 aij->nonew = nonew; 1772 } 1773 PetscCall(MatShift_Basic(Y, a)); 1774 PetscFunctionReturn(PETSC_SUCCESS); 1775 } 1776 1777 static PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1778 { 1779 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1780 1781 PetscFunctionBegin; 1782 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1783 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1784 if (d) { 1785 PetscInt rstart; 1786 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1787 *d += rstart / A->rmap->bs; 1788 } 1789 PetscFunctionReturn(PETSC_SUCCESS); 1790 } 1791 1792 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1793 { 1794 PetscFunctionBegin; 1795 *a = ((Mat_MPISBAIJ *)A->data)->A; 1796 PetscFunctionReturn(PETSC_SUCCESS); 1797 } 1798 1799 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1800 { 1801 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1802 1803 PetscFunctionBegin; 1804 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1805 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1810 MatGetRow_MPISBAIJ, 1811 MatRestoreRow_MPISBAIJ, 1812 MatMult_MPISBAIJ, 1813 /* 4*/ MatMultAdd_MPISBAIJ, 1814 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1815 MatMultAdd_MPISBAIJ, 1816 NULL, 1817 NULL, 1818 NULL, 1819 /* 10*/ NULL, 1820 NULL, 1821 NULL, 1822 MatSOR_MPISBAIJ, 1823 MatTranspose_MPISBAIJ, 1824 /* 15*/ MatGetInfo_MPISBAIJ, 1825 MatEqual_MPISBAIJ, 1826 MatGetDiagonal_MPISBAIJ, 1827 MatDiagonalScale_MPISBAIJ, 1828 MatNorm_MPISBAIJ, 1829 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1830 MatAssemblyEnd_MPISBAIJ, 1831 MatSetOption_MPISBAIJ, 1832 MatZeroEntries_MPISBAIJ, 1833 /* 24*/ NULL, 1834 NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 /* 29*/ MatSetUp_MPI_Hash, 1839 NULL, 1840 NULL, 1841 MatGetDiagonalBlock_MPISBAIJ, 1842 NULL, 1843 /* 34*/ MatDuplicate_MPISBAIJ, 1844 NULL, 1845 NULL, 1846 NULL, 1847 NULL, 1848 /* 39*/ MatAXPY_MPISBAIJ, 1849 MatCreateSubMatrices_MPISBAIJ, 1850 MatIncreaseOverlap_MPISBAIJ, 1851 MatGetValues_MPISBAIJ, 1852 MatCopy_MPISBAIJ, 1853 /* 44*/ NULL, 1854 MatScale_MPISBAIJ, 1855 MatShift_MPISBAIJ, 1856 NULL, 1857 NULL, 1858 /* 49*/ NULL, 1859 NULL, 1860 NULL, 1861 NULL, 1862 NULL, 1863 /* 54*/ NULL, 1864 NULL, 1865 MatSetUnfactored_MPISBAIJ, 1866 NULL, 1867 MatSetValuesBlocked_MPISBAIJ, 1868 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1869 NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 /* 64*/ NULL, 1874 NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1879 NULL, 1880 MatConvert_MPISBAIJ_Basic, 1881 NULL, 1882 NULL, 1883 /* 74*/ NULL, 1884 NULL, 1885 NULL, 1886 NULL, 1887 NULL, 1888 /* 79*/ NULL, 1889 NULL, 1890 NULL, 1891 NULL, 1892 MatLoad_MPISBAIJ, 1893 /* 84*/ NULL, 1894 NULL, 1895 NULL, 1896 NULL, 1897 NULL, 1898 /* 89*/ NULL, 1899 NULL, 1900 NULL, 1901 NULL, 1902 NULL, 1903 /* 94*/ NULL, 1904 NULL, 1905 NULL, 1906 NULL, 1907 NULL, 1908 /* 99*/ NULL, 1909 NULL, 1910 NULL, 1911 MatConjugate_MPISBAIJ, 1912 NULL, 1913 /*104*/ NULL, 1914 MatRealPart_MPISBAIJ, 1915 MatImaginaryPart_MPISBAIJ, 1916 MatGetRowUpperTriangular_MPISBAIJ, 1917 MatRestoreRowUpperTriangular_MPISBAIJ, 1918 /*109*/ NULL, 1919 NULL, 1920 NULL, 1921 NULL, 1922 MatMissingDiagonal_MPISBAIJ, 1923 /*114*/ NULL, 1924 NULL, 1925 NULL, 1926 NULL, 1927 NULL, 1928 /*119*/ NULL, 1929 NULL, 1930 NULL, 1931 NULL, 1932 NULL, 1933 /*124*/ NULL, 1934 NULL, 1935 NULL, 1936 NULL, 1937 NULL, 1938 /*129*/ NULL, 1939 NULL, 1940 NULL, 1941 NULL, 1942 NULL, 1943 /*134*/ NULL, 1944 NULL, 1945 NULL, 1946 NULL, 1947 NULL, 1948 /*139*/ MatSetBlockSizes_Default, 1949 NULL, 1950 NULL, 1951 NULL, 1952 NULL, 1953 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1954 NULL, 1955 NULL, 1956 NULL, 1957 NULL, 1958 NULL, 1959 /*150*/ NULL, 1960 MatEliminateZeros_MPISBAIJ}; 1961 1962 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1963 { 1964 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1965 PetscInt i, mbs, Mbs; 1966 PetscMPIInt size; 1967 1968 PetscFunctionBegin; 1969 if (B->hash_active) { 1970 B->ops[0] = b->cops; 1971 B->hash_active = PETSC_FALSE; 1972 } 1973 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1974 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1975 PetscCall(PetscLayoutSetUp(B->rmap)); 1976 PetscCall(PetscLayoutSetUp(B->cmap)); 1977 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1978 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1979 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1980 1981 mbs = B->rmap->n / bs; 1982 Mbs = B->rmap->N / bs; 1983 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1984 1985 B->rmap->bs = bs; 1986 b->bs2 = bs * bs; 1987 b->mbs = mbs; 1988 b->Mbs = Mbs; 1989 b->nbs = B->cmap->n / bs; 1990 b->Nbs = B->cmap->N / bs; 1991 1992 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1993 b->rstartbs = B->rmap->rstart / bs; 1994 b->rendbs = B->rmap->rend / bs; 1995 1996 b->cstartbs = B->cmap->rstart / bs; 1997 b->cendbs = B->cmap->rend / bs; 1998 1999 #if defined(PETSC_USE_CTABLE) 2000 PetscCall(PetscHMapIDestroy(&b->colmap)); 2001 #else 2002 PetscCall(PetscFree(b->colmap)); 2003 #endif 2004 PetscCall(PetscFree(b->garray)); 2005 PetscCall(VecDestroy(&b->lvec)); 2006 PetscCall(VecScatterDestroy(&b->Mvctx)); 2007 PetscCall(VecDestroy(&b->slvec0)); 2008 PetscCall(VecDestroy(&b->slvec0b)); 2009 PetscCall(VecDestroy(&b->slvec1)); 2010 PetscCall(VecDestroy(&b->slvec1a)); 2011 PetscCall(VecDestroy(&b->slvec1b)); 2012 PetscCall(VecScatterDestroy(&b->sMvctx)); 2013 2014 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2015 PetscCall(MatDestroy(&b->B)); 2016 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2017 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2018 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2019 2020 PetscCall(MatDestroy(&b->A)); 2021 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2022 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2023 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 2024 2025 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2026 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2027 2028 B->preallocated = PETSC_TRUE; 2029 B->was_assembled = PETSC_FALSE; 2030 B->assembled = PETSC_FALSE; 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2035 { 2036 PetscInt m, rstart, cend; 2037 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2038 const PetscInt *JJ = NULL; 2039 PetscScalar *values = NULL; 2040 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 2041 PetscBool nooffprocentries; 2042 2043 PetscFunctionBegin; 2044 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 2045 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2046 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2047 PetscCall(PetscLayoutSetUp(B->rmap)); 2048 PetscCall(PetscLayoutSetUp(B->cmap)); 2049 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2050 m = B->rmap->n / bs; 2051 rstart = B->rmap->rstart / bs; 2052 cend = B->cmap->rend / bs; 2053 2054 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2055 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2056 for (i = 0; i < m; i++) { 2057 nz = ii[i + 1] - ii[i]; 2058 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2059 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2060 JJ = jj + ii[i]; 2061 bd = 0; 2062 for (j = 0; j < nz; j++) { 2063 if (*JJ >= i + rstart) break; 2064 JJ++; 2065 bd++; 2066 } 2067 d = 0; 2068 for (; j < nz; j++) { 2069 if (*JJ++ >= cend) break; 2070 d++; 2071 } 2072 d_nnz[i] = d; 2073 o_nnz[i] = nz - d - bd; 2074 nz = nz - bd; 2075 nz_max = PetscMax(nz_max, nz); 2076 } 2077 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2078 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2079 PetscCall(PetscFree2(d_nnz, o_nnz)); 2080 2081 values = (PetscScalar *)V; 2082 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2083 for (i = 0; i < m; i++) { 2084 PetscInt row = i + rstart; 2085 PetscInt ncols = ii[i + 1] - ii[i]; 2086 const PetscInt *icols = jj + ii[i]; 2087 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2088 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2089 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2090 } else { /* block ordering does not match so we can only insert one block at a time. */ 2091 PetscInt j; 2092 for (j = 0; j < ncols; j++) { 2093 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2094 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2095 } 2096 } 2097 } 2098 2099 if (!V) PetscCall(PetscFree(values)); 2100 nooffprocentries = B->nooffprocentries; 2101 B->nooffprocentries = PETSC_TRUE; 2102 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2103 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2104 B->nooffprocentries = nooffprocentries; 2105 2106 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2107 PetscFunctionReturn(PETSC_SUCCESS); 2108 } 2109 2110 /*MC 2111 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2112 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2113 the matrix is stored. 2114 2115 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2116 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2117 2118 Options Database Key: 2119 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2120 2121 Level: beginner 2122 2123 Note: 2124 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2125 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2126 2127 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2128 M*/ 2129 2130 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2131 { 2132 Mat_MPISBAIJ *b; 2133 PetscBool flg = PETSC_FALSE; 2134 2135 PetscFunctionBegin; 2136 PetscCall(PetscNew(&b)); 2137 B->data = (void *)b; 2138 B->ops[0] = MatOps_Values; 2139 2140 B->ops->destroy = MatDestroy_MPISBAIJ; 2141 B->ops->view = MatView_MPISBAIJ; 2142 B->assembled = PETSC_FALSE; 2143 B->insertmode = NOT_SET_VALUES; 2144 2145 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2146 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2147 2148 /* build local table of row and column ownerships */ 2149 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2150 2151 /* build cache for off array entries formed */ 2152 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2153 2154 b->donotstash = PETSC_FALSE; 2155 b->colmap = NULL; 2156 b->garray = NULL; 2157 b->roworiented = PETSC_TRUE; 2158 2159 /* stuff used in block assembly */ 2160 b->barray = NULL; 2161 2162 /* stuff used for matrix vector multiply */ 2163 b->lvec = NULL; 2164 b->Mvctx = NULL; 2165 b->slvec0 = NULL; 2166 b->slvec0b = NULL; 2167 b->slvec1 = NULL; 2168 b->slvec1a = NULL; 2169 b->slvec1b = NULL; 2170 b->sMvctx = NULL; 2171 2172 /* stuff for MatGetRow() */ 2173 b->rowindices = NULL; 2174 b->rowvalues = NULL; 2175 b->getrowactive = PETSC_FALSE; 2176 2177 /* hash table stuff */ 2178 b->ht = NULL; 2179 b->hd = NULL; 2180 b->ht_size = 0; 2181 b->ht_flag = PETSC_FALSE; 2182 b->ht_fact = 0; 2183 b->ht_total_ct = 0; 2184 b->ht_insert_ct = 0; 2185 2186 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2187 b->ijonly = PETSC_FALSE; 2188 2189 b->in_loc = NULL; 2190 b->v_loc = NULL; 2191 b->n_loc = 0; 2192 2193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2197 #if defined(PETSC_HAVE_ELEMENTAL) 2198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2199 #endif 2200 #if defined(PETSC_HAVE_SCALAPACK) 2201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2202 #endif 2203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2205 2206 B->symmetric = PETSC_BOOL3_TRUE; 2207 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2208 B->symmetry_eternal = PETSC_TRUE; 2209 B->structural_symmetry_eternal = PETSC_TRUE; 2210 #if defined(PETSC_USE_COMPLEX) 2211 B->hermitian = PETSC_BOOL3_FALSE; 2212 #else 2213 B->hermitian = PETSC_BOOL3_TRUE; 2214 #endif 2215 2216 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2217 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2218 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2219 if (flg) { 2220 PetscReal fact = 1.39; 2221 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2222 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2223 if (fact <= 1.0) fact = 1.39; 2224 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2225 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2226 } 2227 PetscOptionsEnd(); 2228 PetscFunctionReturn(PETSC_SUCCESS); 2229 } 2230 2231 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2232 /*MC 2233 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2234 2235 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2236 and `MATMPISBAIJ` otherwise. 2237 2238 Options Database Key: 2239 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2240 2241 Level: beginner 2242 2243 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2244 M*/ 2245 2246 /*@C 2247 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2248 the user should preallocate the matrix storage by setting the parameters 2249 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2250 performance can be increased by more than a factor of 50. 2251 2252 Collective 2253 2254 Input Parameters: 2255 + B - the matrix 2256 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2257 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2258 . d_nz - number of block nonzeros per block row in diagonal portion of local 2259 submatrix (same for all local rows) 2260 . d_nnz - array containing the number of block nonzeros in the various block rows 2261 in the upper triangular and diagonal part of the in diagonal portion of the local 2262 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2263 for the diagonal entry and set a value even if it is zero. 2264 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2265 submatrix (same for all local rows). 2266 - o_nnz - array containing the number of nonzeros in the various block rows of the 2267 off-diagonal portion of the local submatrix that is right of the diagonal 2268 (possibly different for each block row) or `NULL`. 2269 2270 Options Database Keys: 2271 + -mat_no_unroll - uses code that does not unroll the loops in the 2272 block calculations (much slower) 2273 - -mat_block_size - size of the blocks to use 2274 2275 Level: intermediate 2276 2277 Notes: 2278 2279 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2280 than it must be used on all processors that share the object for that argument. 2281 2282 If the *_nnz parameter is given then the *_nz parameter is ignored 2283 2284 Storage Information: 2285 For a square global matrix we define each processor's diagonal portion 2286 to be its local rows and the corresponding columns (a square submatrix); 2287 each processor's off-diagonal portion encompasses the remainder of the 2288 local matrix (a rectangular submatrix). 2289 2290 The user can specify preallocated storage for the diagonal part of 2291 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2292 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2293 memory allocation. Likewise, specify preallocated storage for the 2294 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2295 2296 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2297 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2298 You can also run with the option `-info` and look for messages with the string 2299 malloc in them to see if additional memory allocation was needed. 2300 2301 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2302 the figure below we depict these three local rows and all columns (0-11). 2303 2304 .vb 2305 0 1 2 3 4 5 6 7 8 9 10 11 2306 -------------------------- 2307 row 3 |. . . d d d o o o o o o 2308 row 4 |. . . d d d o o o o o o 2309 row 5 |. . . d d d o o o o o o 2310 -------------------------- 2311 .ve 2312 2313 Thus, any entries in the d locations are stored in the d (diagonal) 2314 submatrix, and any entries in the o locations are stored in the 2315 o (off-diagonal) submatrix. Note that the d matrix is stored in 2316 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2317 2318 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2319 plus the diagonal part of the d matrix, 2320 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2321 2322 In general, for PDE problems in which most nonzeros are near the diagonal, 2323 one expects `d_nz` >> `o_nz`. 2324 2325 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2326 @*/ 2327 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2328 { 2329 PetscFunctionBegin; 2330 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2331 PetscValidType(B, 1); 2332 PetscValidLogicalCollectiveInt(B, bs, 2); 2333 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2334 PetscFunctionReturn(PETSC_SUCCESS); 2335 } 2336 2337 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2338 /*@C 2339 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2340 (block compressed row). For good matrix assembly performance 2341 the user should preallocate the matrix storage by setting the parameters 2342 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2343 2344 Collective 2345 2346 Input Parameters: 2347 + comm - MPI communicator 2348 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2349 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2350 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2351 This value should be the same as the local size used in creating the 2352 y vector for the matrix-vector product y = Ax. 2353 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2354 This value should be the same as the local size used in creating the 2355 x vector for the matrix-vector product y = Ax. 2356 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2357 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2358 . d_nz - number of block nonzeros per block row in diagonal portion of local 2359 submatrix (same for all local rows) 2360 . d_nnz - array containing the number of block nonzeros in the various block rows 2361 in the upper triangular portion of the in diagonal portion of the local 2362 (possibly different for each block block row) or `NULL`. 2363 If you plan to factor the matrix you must leave room for the diagonal entry and 2364 set its value even if it is zero. 2365 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2366 submatrix (same for all local rows). 2367 - o_nnz - array containing the number of nonzeros in the various block rows of the 2368 off-diagonal portion of the local submatrix (possibly different for 2369 each block row) or `NULL`. 2370 2371 Output Parameter: 2372 . A - the matrix 2373 2374 Options Database Keys: 2375 + -mat_no_unroll - uses code that does not unroll the loops in the 2376 block calculations (much slower) 2377 . -mat_block_size - size of the blocks to use 2378 - -mat_mpi - use the parallel matrix data structures even on one processor 2379 (defaults to using SeqBAIJ format on one processor) 2380 2381 Level: intermediate 2382 2383 Notes: 2384 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2385 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2386 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2387 2388 The number of rows and columns must be divisible by blocksize. 2389 This matrix type does not support complex Hermitian operation. 2390 2391 The user MUST specify either the local or global matrix dimensions 2392 (possibly both). 2393 2394 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2395 than it must be used on all processors that share the object for that argument. 2396 2397 If the *_nnz parameter is given then the *_nz parameter is ignored 2398 2399 Storage Information: 2400 For a square global matrix we define each processor's diagonal portion 2401 to be its local rows and the corresponding columns (a square submatrix); 2402 each processor's off-diagonal portion encompasses the remainder of the 2403 local matrix (a rectangular submatrix). 2404 2405 The user can specify preallocated storage for the diagonal part of 2406 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2407 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2408 memory allocation. Likewise, specify preallocated storage for the 2409 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2410 2411 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2412 the figure below we depict these three local rows and all columns (0-11). 2413 2414 .vb 2415 0 1 2 3 4 5 6 7 8 9 10 11 2416 -------------------------- 2417 row 3 |. . . d d d o o o o o o 2418 row 4 |. . . d d d o o o o o o 2419 row 5 |. . . d d d o o o o o o 2420 -------------------------- 2421 .ve 2422 2423 Thus, any entries in the d locations are stored in the d (diagonal) 2424 submatrix, and any entries in the o locations are stored in the 2425 o (off-diagonal) submatrix. Note that the d matrix is stored in 2426 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2427 2428 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2429 plus the diagonal part of the d matrix, 2430 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2431 In general, for PDE problems in which most nonzeros are near the diagonal, 2432 one expects `d_nz` >> `o_nz`. 2433 2434 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2435 @*/ 2436 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2437 { 2438 PetscMPIInt size; 2439 2440 PetscFunctionBegin; 2441 PetscCall(MatCreate(comm, A)); 2442 PetscCall(MatSetSizes(*A, m, n, M, N)); 2443 PetscCallMPI(MPI_Comm_size(comm, &size)); 2444 if (size > 1) { 2445 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2446 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2447 } else { 2448 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2449 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2450 } 2451 PetscFunctionReturn(PETSC_SUCCESS); 2452 } 2453 2454 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2455 { 2456 Mat mat; 2457 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2458 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2459 PetscScalar *array; 2460 2461 PetscFunctionBegin; 2462 *newmat = NULL; 2463 2464 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2465 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2466 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2467 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2468 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2469 2470 mat->factortype = matin->factortype; 2471 mat->preallocated = PETSC_TRUE; 2472 mat->assembled = PETSC_TRUE; 2473 mat->insertmode = NOT_SET_VALUES; 2474 2475 a = (Mat_MPISBAIJ *)mat->data; 2476 a->bs2 = oldmat->bs2; 2477 a->mbs = oldmat->mbs; 2478 a->nbs = oldmat->nbs; 2479 a->Mbs = oldmat->Mbs; 2480 a->Nbs = oldmat->Nbs; 2481 2482 a->size = oldmat->size; 2483 a->rank = oldmat->rank; 2484 a->donotstash = oldmat->donotstash; 2485 a->roworiented = oldmat->roworiented; 2486 a->rowindices = NULL; 2487 a->rowvalues = NULL; 2488 a->getrowactive = PETSC_FALSE; 2489 a->barray = NULL; 2490 a->rstartbs = oldmat->rstartbs; 2491 a->rendbs = oldmat->rendbs; 2492 a->cstartbs = oldmat->cstartbs; 2493 a->cendbs = oldmat->cendbs; 2494 2495 /* hash table stuff */ 2496 a->ht = NULL; 2497 a->hd = NULL; 2498 a->ht_size = 0; 2499 a->ht_flag = oldmat->ht_flag; 2500 a->ht_fact = oldmat->ht_fact; 2501 a->ht_total_ct = 0; 2502 a->ht_insert_ct = 0; 2503 2504 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2505 if (oldmat->colmap) { 2506 #if defined(PETSC_USE_CTABLE) 2507 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2508 #else 2509 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2510 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2511 #endif 2512 } else a->colmap = NULL; 2513 2514 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2515 PetscCall(PetscMalloc1(len, &a->garray)); 2516 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2517 } else a->garray = NULL; 2518 2519 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2520 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2521 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2522 2523 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2524 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2525 2526 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2527 PetscCall(VecGetArray(a->slvec1, &array)); 2528 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2529 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2530 PetscCall(VecRestoreArray(a->slvec1, &array)); 2531 PetscCall(VecGetArray(a->slvec0, &array)); 2532 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2533 PetscCall(VecRestoreArray(a->slvec0, &array)); 2534 2535 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2536 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2537 a->sMvctx = oldmat->sMvctx; 2538 2539 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2540 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2541 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2542 *newmat = mat; 2543 PetscFunctionReturn(PETSC_SUCCESS); 2544 } 2545 2546 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2547 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2548 2549 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2550 { 2551 PetscBool isbinary; 2552 2553 PetscFunctionBegin; 2554 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2555 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2556 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2557 PetscFunctionReturn(PETSC_SUCCESS); 2558 } 2559 2560 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2561 { 2562 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2563 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2564 PetscReal atmp; 2565 PetscReal *work, *svalues, *rvalues; 2566 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2567 PetscMPIInt rank, size; 2568 PetscInt *rowners_bs, dest, count, source; 2569 PetscScalar *va; 2570 MatScalar *ba; 2571 MPI_Status stat; 2572 2573 PetscFunctionBegin; 2574 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2575 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2576 PetscCall(VecGetArray(v, &va)); 2577 2578 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2579 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2580 2581 bs = A->rmap->bs; 2582 mbs = a->mbs; 2583 Mbs = a->Mbs; 2584 ba = b->a; 2585 bi = b->i; 2586 bj = b->j; 2587 2588 /* find ownerships */ 2589 rowners_bs = A->rmap->range; 2590 2591 /* each proc creates an array to be distributed */ 2592 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2593 2594 /* row_max for B */ 2595 if (rank != size - 1) { 2596 for (i = 0; i < mbs; i++) { 2597 ncols = bi[1] - bi[0]; 2598 bi++; 2599 brow = bs * i; 2600 for (j = 0; j < ncols; j++) { 2601 bcol = bs * (*bj); 2602 for (kcol = 0; kcol < bs; kcol++) { 2603 col = bcol + kcol; /* local col index */ 2604 col += rowners_bs[rank + 1]; /* global col index */ 2605 for (krow = 0; krow < bs; krow++) { 2606 atmp = PetscAbsScalar(*ba); 2607 ba++; 2608 row = brow + krow; /* local row index */ 2609 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2610 if (work[col] < atmp) work[col] = atmp; 2611 } 2612 } 2613 bj++; 2614 } 2615 } 2616 2617 /* send values to its owners */ 2618 for (dest = rank + 1; dest < size; dest++) { 2619 svalues = work + rowners_bs[dest]; 2620 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2621 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2622 } 2623 } 2624 2625 /* receive values */ 2626 if (rank) { 2627 rvalues = work; 2628 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2629 for (source = 0; source < rank; source++) { 2630 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2631 /* process values */ 2632 for (i = 0; i < count; i++) { 2633 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2634 } 2635 } 2636 } 2637 2638 PetscCall(VecRestoreArray(v, &va)); 2639 PetscCall(PetscFree(work)); 2640 PetscFunctionReturn(PETSC_SUCCESS); 2641 } 2642 2643 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2644 { 2645 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2646 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2647 PetscScalar *x, *ptr, *from; 2648 Vec bb1; 2649 const PetscScalar *b; 2650 2651 PetscFunctionBegin; 2652 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2653 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2654 2655 if (flag == SOR_APPLY_UPPER) { 2656 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2661 if (flag & SOR_ZERO_INITIAL_GUESS) { 2662 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2663 its--; 2664 } 2665 2666 PetscCall(VecDuplicate(bb, &bb1)); 2667 while (its--) { 2668 /* lower triangular part: slvec0b = - B^T*xx */ 2669 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2670 2671 /* copy xx into slvec0a */ 2672 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2673 PetscCall(VecGetArray(xx, &x)); 2674 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2675 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2676 2677 PetscCall(VecScale(mat->slvec0, -1.0)); 2678 2679 /* copy bb into slvec1a */ 2680 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2681 PetscCall(VecGetArrayRead(bb, &b)); 2682 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2683 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2684 2685 /* set slvec1b = 0 */ 2686 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2687 PetscCall(VecZeroEntries(mat->slvec1b)); 2688 2689 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2690 PetscCall(VecRestoreArray(xx, &x)); 2691 PetscCall(VecRestoreArrayRead(bb, &b)); 2692 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2693 2694 /* upper triangular part: bb1 = bb1 - B*x */ 2695 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2696 2697 /* local diagonal sweep */ 2698 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2699 } 2700 PetscCall(VecDestroy(&bb1)); 2701 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2702 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2703 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2704 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2705 } else if (flag & SOR_EISENSTAT) { 2706 Vec xx1; 2707 PetscBool hasop; 2708 const PetscScalar *diag; 2709 PetscScalar *sl, scale = (omega - 2.0) / omega; 2710 PetscInt i, n; 2711 2712 if (!mat->xx1) { 2713 PetscCall(VecDuplicate(bb, &mat->xx1)); 2714 PetscCall(VecDuplicate(bb, &mat->bb1)); 2715 } 2716 xx1 = mat->xx1; 2717 bb1 = mat->bb1; 2718 2719 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2720 2721 if (!mat->diag) { 2722 /* this is wrong for same matrix with new nonzero values */ 2723 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2724 PetscCall(MatGetDiagonal(matin, mat->diag)); 2725 } 2726 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2727 2728 if (hasop) { 2729 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2730 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2731 } else { 2732 /* 2733 These two lines are replaced by code that may be a bit faster for a good compiler 2734 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2735 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2736 */ 2737 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2738 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2739 PetscCall(VecGetArrayRead(bb, &b)); 2740 PetscCall(VecGetArray(xx, &x)); 2741 PetscCall(VecGetLocalSize(xx, &n)); 2742 if (omega == 1.0) { 2743 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2744 PetscCall(PetscLogFlops(2.0 * n)); 2745 } else { 2746 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2747 PetscCall(PetscLogFlops(3.0 * n)); 2748 } 2749 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2750 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2751 PetscCall(VecRestoreArrayRead(bb, &b)); 2752 PetscCall(VecRestoreArray(xx, &x)); 2753 } 2754 2755 /* multiply off-diagonal portion of matrix */ 2756 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2757 PetscCall(VecZeroEntries(mat->slvec1b)); 2758 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2759 PetscCall(VecGetArray(mat->slvec0, &from)); 2760 PetscCall(VecGetArray(xx, &x)); 2761 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2762 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2763 PetscCall(VecRestoreArray(xx, &x)); 2764 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2765 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2766 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2767 2768 /* local sweep */ 2769 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2770 PetscCall(VecAXPY(xx, 1.0, xx1)); 2771 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2772 PetscFunctionReturn(PETSC_SUCCESS); 2773 } 2774 2775 /*@ 2776 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2777 CSR format the local rows. 2778 2779 Collective 2780 2781 Input Parameters: 2782 + comm - MPI communicator 2783 . bs - the block size, only a block size of 1 is supported 2784 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2785 . n - This value should be the same as the local size used in creating the 2786 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2787 calculated if `N` is given) For square matrices `n` is almost always `m`. 2788 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2789 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2790 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2791 . j - column indices 2792 - a - matrix values 2793 2794 Output Parameter: 2795 . mat - the matrix 2796 2797 Level: intermediate 2798 2799 Notes: 2800 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2801 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2802 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2803 2804 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2805 2806 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2807 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2808 @*/ 2809 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2810 { 2811 PetscFunctionBegin; 2812 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2813 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2814 PetscCall(MatCreate(comm, mat)); 2815 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2816 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2817 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2818 PetscFunctionReturn(PETSC_SUCCESS); 2819 } 2820 2821 /*@C 2822 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2823 2824 Collective 2825 2826 Input Parameters: 2827 + B - the matrix 2828 . bs - the block size 2829 . i - the indices into `j` for the start of each local row (starts with zero) 2830 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2831 - v - optional values in the matrix 2832 2833 Level: advanced 2834 2835 Notes: 2836 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2837 and usually the numerical values as well 2838 2839 Any entries below the diagonal are ignored 2840 2841 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ` 2842 @*/ 2843 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2844 { 2845 PetscFunctionBegin; 2846 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2847 PetscFunctionReturn(PETSC_SUCCESS); 2848 } 2849 2850 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2851 { 2852 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2853 PetscInt *indx; 2854 PetscScalar *values; 2855 2856 PetscFunctionBegin; 2857 PetscCall(MatGetSize(inmat, &m, &N)); 2858 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2859 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2860 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2861 PetscInt *bindx, rmax = a->rmax, j; 2862 PetscMPIInt rank, size; 2863 2864 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2865 mbs = m / bs; 2866 Nbs = N / cbs; 2867 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2868 nbs = n / cbs; 2869 2870 PetscCall(PetscMalloc1(rmax, &bindx)); 2871 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2872 2873 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2874 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2875 if (rank == size - 1) { 2876 /* Check sum(nbs) = Nbs */ 2877 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2878 } 2879 2880 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2881 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2882 for (i = 0; i < mbs; i++) { 2883 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2884 nnz = nnz / bs; 2885 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2886 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2887 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2888 } 2889 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2890 PetscCall(PetscFree(bindx)); 2891 2892 PetscCall(MatCreate(comm, outmat)); 2893 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2894 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2895 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2896 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2897 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2898 MatPreallocateEnd(dnz, onz); 2899 } 2900 2901 /* numeric phase */ 2902 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2903 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2904 2905 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2906 for (i = 0; i < m; i++) { 2907 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2908 Ii = i + rstart; 2909 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2910 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2911 } 2912 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2913 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2914 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2915 PetscFunctionReturn(PETSC_SUCCESS); 2916 } 2917