1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 static PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 9 10 PetscFunctionBegin; 11 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 12 PetscCall(MatStashDestroy_Private(&mat->stash)); 13 PetscCall(MatStashDestroy_Private(&mat->bstash)); 14 PetscCall(MatDestroy(&baij->A)); 15 PetscCall(MatDestroy(&baij->B)); 16 #if defined(PETSC_USE_CTABLE) 17 PetscCall(PetscHMapIDestroy(&baij->colmap)); 18 #else 19 PetscCall(PetscFree(baij->colmap)); 20 #endif 21 PetscCall(PetscFree(baij->garray)); 22 PetscCall(VecDestroy(&baij->lvec)); 23 PetscCall(VecScatterDestroy(&baij->Mvctx)); 24 PetscCall(VecDestroy(&baij->slvec0)); 25 PetscCall(VecDestroy(&baij->slvec0b)); 26 PetscCall(VecDestroy(&baij->slvec1)); 27 PetscCall(VecDestroy(&baij->slvec1a)); 28 PetscCall(VecDestroy(&baij->slvec1b)); 29 PetscCall(VecScatterDestroy(&baij->sMvctx)); 30 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 31 PetscCall(PetscFree(baij->barray)); 32 PetscCall(PetscFree(baij->hd)); 33 PetscCall(VecDestroy(&baij->diag)); 34 PetscCall(VecDestroy(&baij->bb1)); 35 PetscCall(VecDestroy(&baij->xx1)); 36 #if defined(PETSC_USE_REAL_MAT_SINGLE) 37 PetscCall(PetscFree(baij->setvaluescopy)); 38 #endif 39 PetscCall(PetscFree(baij->in_loc)); 40 PetscCall(PetscFree(baij->v_loc)); 41 PetscCall(PetscFree(baij->rangebs)); 42 PetscCall(PetscFree(mat->data)); 43 44 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 49 #if defined(PETSC_HAVE_ELEMENTAL) 50 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 51 #endif 52 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 53 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 54 #endif 55 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 57 PetscFunctionReturn(PETSC_SUCCESS); 58 } 59 60 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */ 61 #define TYPE SBAIJ 62 #define TYPE_SBAIJ 63 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 64 #undef TYPE 65 #undef TYPE_SBAIJ 66 67 #if defined(PETSC_HAVE_ELEMENTAL) 68 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 69 #endif 70 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 71 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 72 #endif 73 74 /* This could be moved to matimpl.h */ 75 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 76 { 77 Mat preallocator; 78 PetscInt r, rstart, rend; 79 PetscInt bs, i, m, n, M, N; 80 PetscBool cong = PETSC_TRUE; 81 82 PetscFunctionBegin; 83 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 84 PetscValidLogicalCollectiveInt(B, nm, 2); 85 for (i = 0; i < nm; i++) { 86 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 87 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 88 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 89 } 90 PetscValidLogicalCollectiveBool(B, fill, 5); 91 PetscCall(MatGetBlockSize(B, &bs)); 92 PetscCall(MatGetSize(B, &M, &N)); 93 PetscCall(MatGetLocalSize(B, &m, &n)); 94 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 95 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 96 PetscCall(MatSetBlockSize(preallocator, bs)); 97 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 98 PetscCall(MatSetUp(preallocator)); 99 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 100 for (r = rstart; r < rend; ++r) { 101 PetscInt ncols; 102 const PetscInt *row; 103 const PetscScalar *vals; 104 105 for (i = 0; i < nm; i++) { 106 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 107 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 108 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 109 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 110 } 111 } 112 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 113 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 114 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 115 PetscCall(MatDestroy(&preallocator)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 120 { 121 Mat B; 122 PetscInt r; 123 124 PetscFunctionBegin; 125 if (reuse != MAT_REUSE_MATRIX) { 126 PetscBool symm = PETSC_TRUE, isdense; 127 PetscInt bs; 128 129 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 130 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 131 PetscCall(MatSetType(B, newtype)); 132 PetscCall(MatGetBlockSize(A, &bs)); 133 PetscCall(MatSetBlockSize(B, bs)); 134 PetscCall(PetscLayoutSetUp(B->rmap)); 135 PetscCall(PetscLayoutSetUp(B->cmap)); 136 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 137 if (!isdense) { 138 PetscCall(MatGetRowUpperTriangular(A)); 139 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 140 PetscCall(MatRestoreRowUpperTriangular(A)); 141 } else { 142 PetscCall(MatSetUp(B)); 143 } 144 } else { 145 B = *newmat; 146 PetscCall(MatZeroEntries(B)); 147 } 148 149 PetscCall(MatGetRowUpperTriangular(A)); 150 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 151 PetscInt ncols; 152 const PetscInt *row; 153 const PetscScalar *vals; 154 155 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 156 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 157 #if defined(PETSC_USE_COMPLEX) 158 if (A->hermitian == PETSC_BOOL3_TRUE) { 159 PetscInt i; 160 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 161 } else { 162 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 163 } 164 #else 165 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 166 #endif 167 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 168 } 169 PetscCall(MatRestoreRowUpperTriangular(A)); 170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 172 173 if (reuse == MAT_INPLACE_MATRIX) { 174 PetscCall(MatHeaderReplace(A, &B)); 175 } else { 176 *newmat = B; 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 182 { 183 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 184 185 PetscFunctionBegin; 186 PetscCall(MatStoreValues(aij->A)); 187 PetscCall(MatStoreValues(aij->B)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 192 { 193 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 194 195 PetscFunctionBegin; 196 PetscCall(MatRetrieveValues(aij->A)); 197 PetscCall(MatRetrieveValues(aij->B)); 198 PetscFunctionReturn(PETSC_SUCCESS); 199 } 200 201 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 202 do { \ 203 brow = row / bs; \ 204 rp = aj + ai[brow]; \ 205 ap = aa + bs2 * ai[brow]; \ 206 rmax = aimax[brow]; \ 207 nrow = ailen[brow]; \ 208 bcol = col / bs; \ 209 ridx = row % bs; \ 210 cidx = col % bs; \ 211 low = 0; \ 212 high = nrow; \ 213 while (high - low > 3) { \ 214 t = (low + high) / 2; \ 215 if (rp[t] > bcol) high = t; \ 216 else low = t; \ 217 } \ 218 for (_i = low; _i < high; _i++) { \ 219 if (rp[_i] > bcol) break; \ 220 if (rp[_i] == bcol) { \ 221 bap = ap + bs2 * _i + bs * cidx + ridx; \ 222 if (addv == ADD_VALUES) *bap += value; \ 223 else *bap = value; \ 224 goto a_noinsert; \ 225 } \ 226 } \ 227 if (a->nonew == 1) goto a_noinsert; \ 228 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 229 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 230 N = nrow++ - 1; \ 231 /* shift up all the later entries in this row */ \ 232 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 233 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 234 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 235 rp[_i] = bcol; \ 236 ap[bs2 * _i + bs * cidx + ridx] = value; \ 237 a_noinsert:; \ 238 ailen[brow] = nrow; \ 239 } while (0) 240 241 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 242 do { \ 243 brow = row / bs; \ 244 rp = bj + bi[brow]; \ 245 ap = ba + bs2 * bi[brow]; \ 246 rmax = bimax[brow]; \ 247 nrow = bilen[brow]; \ 248 bcol = col / bs; \ 249 ridx = row % bs; \ 250 cidx = col % bs; \ 251 low = 0; \ 252 high = nrow; \ 253 while (high - low > 3) { \ 254 t = (low + high) / 2; \ 255 if (rp[t] > bcol) high = t; \ 256 else low = t; \ 257 } \ 258 for (_i = low; _i < high; _i++) { \ 259 if (rp[_i] > bcol) break; \ 260 if (rp[_i] == bcol) { \ 261 bap = ap + bs2 * _i + bs * cidx + ridx; \ 262 if (addv == ADD_VALUES) *bap += value; \ 263 else *bap = value; \ 264 goto b_noinsert; \ 265 } \ 266 } \ 267 if (b->nonew == 1) goto b_noinsert; \ 268 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 269 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 270 N = nrow++ - 1; \ 271 /* shift up all the later entries in this row */ \ 272 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 273 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 274 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 275 rp[_i] = bcol; \ 276 ap[bs2 * _i + bs * cidx + ridx] = value; \ 277 b_noinsert:; \ 278 bilen[brow] = nrow; \ 279 } while (0) 280 281 /* Only add/insert a(i,j) with i<=j (blocks). 282 Any a(i,j) with i>j input by user is ignored or generates an error 283 */ 284 static PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 285 { 286 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 287 MatScalar value; 288 PetscBool roworiented = baij->roworiented; 289 PetscInt i, j, row, col; 290 PetscInt rstart_orig = mat->rmap->rstart; 291 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 292 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 293 294 /* Some Variables required in the macro */ 295 Mat A = baij->A; 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 297 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 298 MatScalar *aa = a->a; 299 300 Mat B = baij->B; 301 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 302 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 303 MatScalar *ba = b->a; 304 305 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 306 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 307 MatScalar *ap, *bap; 308 309 /* for stash */ 310 PetscInt n_loc, *in_loc = NULL; 311 MatScalar *v_loc = NULL; 312 313 PetscFunctionBegin; 314 if (!baij->donotstash) { 315 if (n > baij->n_loc) { 316 PetscCall(PetscFree(baij->in_loc)); 317 PetscCall(PetscFree(baij->v_loc)); 318 PetscCall(PetscMalloc1(n, &baij->in_loc)); 319 PetscCall(PetscMalloc1(n, &baij->v_loc)); 320 321 baij->n_loc = n; 322 } 323 in_loc = baij->in_loc; 324 v_loc = baij->v_loc; 325 } 326 327 for (i = 0; i < m; i++) { 328 if (im[i] < 0) continue; 329 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 330 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 331 row = im[i] - rstart_orig; /* local row index */ 332 for (j = 0; j < n; j++) { 333 if (im[i] / bs > in[j] / bs) { 334 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 335 continue; /* ignore lower triangular blocks */ 336 } 337 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 338 col = in[j] - cstart_orig; /* local col index */ 339 brow = row / bs; 340 bcol = col / bs; 341 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 342 if (roworiented) value = v[i * n + j]; 343 else value = v[i + j * m]; 344 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 345 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 346 } else if (in[j] < 0) { 347 continue; 348 } else { 349 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 350 /* off-diag entry (B) */ 351 if (mat->was_assembled) { 352 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 353 #if defined(PETSC_USE_CTABLE) 354 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 355 col = col - 1; 356 #else 357 col = baij->colmap[in[j] / bs] - 1; 358 #endif 359 if (col < 0 && !((Mat_SeqSBAIJ *)baij->A->data)->nonew) { 360 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 361 col = in[j]; 362 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 363 B = baij->B; 364 b = (Mat_SeqBAIJ *)B->data; 365 bimax = b->imax; 366 bi = b->i; 367 bilen = b->ilen; 368 bj = b->j; 369 ba = b->a; 370 } else col += in[j] % bs; 371 } else col = in[j]; 372 if (roworiented) value = v[i * n + j]; 373 else value = v[i + j * m]; 374 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 375 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 376 } 377 } 378 } else { /* off processor entry */ 379 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 380 if (!baij->donotstash) { 381 mat->assembled = PETSC_FALSE; 382 n_loc = 0; 383 for (j = 0; j < n; j++) { 384 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 385 in_loc[n_loc] = in[j]; 386 if (roworiented) { 387 v_loc[n_loc] = v[i * n + j]; 388 } else { 389 v_loc[n_loc] = v[j * m + i]; 390 } 391 n_loc++; 392 } 393 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 394 } 395 } 396 } 397 PetscFunctionReturn(PETSC_SUCCESS); 398 } 399 400 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 401 { 402 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 403 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 404 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 405 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 406 PetscBool roworiented = a->roworiented; 407 const PetscScalar *value = v; 408 MatScalar *ap, *aa = a->a, *bap; 409 410 PetscFunctionBegin; 411 if (col < row) { 412 PetscCheck(a->ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 413 PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 414 } 415 rp = aj + ai[row]; 416 ap = aa + bs2 * ai[row]; 417 rmax = imax[row]; 418 nrow = ailen[row]; 419 value = v; 420 low = 0; 421 high = nrow; 422 423 while (high - low > 7) { 424 t = (low + high) / 2; 425 if (rp[t] > col) high = t; 426 else low = t; 427 } 428 for (i = low; i < high; i++) { 429 if (rp[i] > col) break; 430 if (rp[i] == col) { 431 bap = ap + bs2 * i; 432 if (roworiented) { 433 if (is == ADD_VALUES) { 434 for (ii = 0; ii < bs; ii++) { 435 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 436 } 437 } else { 438 for (ii = 0; ii < bs; ii++) { 439 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 440 } 441 } 442 } else { 443 if (is == ADD_VALUES) { 444 for (ii = 0; ii < bs; ii++) { 445 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 446 } 447 } else { 448 for (ii = 0; ii < bs; ii++) { 449 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 450 } 451 } 452 } 453 goto noinsert2; 454 } 455 } 456 if (nonew == 1) goto noinsert2; 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 458 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 459 N = nrow++ - 1; 460 high++; 461 /* shift up all the later entries in this row */ 462 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 463 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 464 rp[i] = col; 465 bap = ap + bs2 * i; 466 if (roworiented) { 467 for (ii = 0; ii < bs; ii++) { 468 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 469 } 470 } else { 471 for (ii = 0; ii < bs; ii++) { 472 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 473 } 474 } 475 noinsert2:; 476 ailen[row] = nrow; 477 PetscFunctionReturn(PETSC_SUCCESS); 478 } 479 480 /* 481 This routine is exactly duplicated in mpibaij.c 482 */ 483 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 484 { 485 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 486 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 487 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 488 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 489 PetscBool roworiented = a->roworiented; 490 const PetscScalar *value = v; 491 MatScalar *ap, *aa = a->a, *bap; 492 493 PetscFunctionBegin; 494 rp = aj + ai[row]; 495 ap = aa + bs2 * ai[row]; 496 rmax = imax[row]; 497 nrow = ailen[row]; 498 low = 0; 499 high = nrow; 500 value = v; 501 while (high - low > 7) { 502 t = (low + high) / 2; 503 if (rp[t] > col) high = t; 504 else low = t; 505 } 506 for (i = low; i < high; i++) { 507 if (rp[i] > col) break; 508 if (rp[i] == col) { 509 bap = ap + bs2 * i; 510 if (roworiented) { 511 if (is == ADD_VALUES) { 512 for (ii = 0; ii < bs; ii++) { 513 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 514 } 515 } else { 516 for (ii = 0; ii < bs; ii++) { 517 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 518 } 519 } 520 } else { 521 if (is == ADD_VALUES) { 522 for (ii = 0; ii < bs; ii++, value += bs) { 523 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 524 bap += bs; 525 } 526 } else { 527 for (ii = 0; ii < bs; ii++, value += bs) { 528 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 529 bap += bs; 530 } 531 } 532 } 533 goto noinsert2; 534 } 535 } 536 if (nonew == 1) goto noinsert2; 537 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 538 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 539 N = nrow++ - 1; 540 high++; 541 /* shift up all the later entries in this row */ 542 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 543 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 544 rp[i] = col; 545 bap = ap + bs2 * i; 546 if (roworiented) { 547 for (ii = 0; ii < bs; ii++) { 548 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 549 } 550 } else { 551 for (ii = 0; ii < bs; ii++) { 552 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 553 } 554 } 555 noinsert2:; 556 ailen[row] = nrow; 557 PetscFunctionReturn(PETSC_SUCCESS); 558 } 559 560 /* 561 This routine could be optimized by removing the need for the block copy below and passing stride information 562 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 563 */ 564 static PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 565 { 566 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 567 const MatScalar *value; 568 MatScalar *barray = baij->barray; 569 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 570 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 571 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 572 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 573 574 PetscFunctionBegin; 575 if (!barray) { 576 PetscCall(PetscMalloc1(bs2, &barray)); 577 baij->barray = barray; 578 } 579 580 if (roworiented) { 581 stepval = (n - 1) * bs; 582 } else { 583 stepval = (m - 1) * bs; 584 } 585 for (i = 0; i < m; i++) { 586 if (im[i] < 0) continue; 587 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 588 if (im[i] >= rstart && im[i] < rend) { 589 row = im[i] - rstart; 590 for (j = 0; j < n; j++) { 591 if (im[i] > in[j]) { 592 PetscCheck(ignore_ltriangular, PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 593 continue; /* ignore lower triangular blocks */ 594 } 595 /* If NumCol = 1 then a copy is not required */ 596 if (roworiented && n == 1) { 597 barray = (MatScalar *)v + i * bs2; 598 } else if ((!roworiented) && (m == 1)) { 599 barray = (MatScalar *)v + j * bs2; 600 } else { /* Here a copy is required */ 601 if (roworiented) { 602 value = v + i * (stepval + bs) * bs + j * bs; 603 } else { 604 value = v + j * (stepval + bs) * bs + i * bs; 605 } 606 for (ii = 0; ii < bs; ii++, value += stepval) { 607 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 608 } 609 barray -= bs2; 610 } 611 612 if (in[j] >= cstart && in[j] < cend) { 613 col = in[j] - cstart; 614 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 615 } else if (in[j] < 0) { 616 continue; 617 } else { 618 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 619 if (mat->was_assembled) { 620 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 621 622 #if defined(PETSC_USE_CTABLE) 623 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 624 col = col < 1 ? -1 : (col - 1) / bs; 625 #else 626 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 627 #endif 628 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 629 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 630 col = in[j]; 631 } 632 } else col = in[j]; 633 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 634 } 635 } 636 } else { 637 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 651 { 652 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 653 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 654 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 655 656 PetscFunctionBegin; 657 for (i = 0; i < m; i++) { 658 if (idxm[i] < 0) continue; /* negative row */ 659 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 660 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 661 row = idxm[i] - bsrstart; 662 for (j = 0; j < n; j++) { 663 if (idxn[j] < 0) continue; /* negative column */ 664 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 665 if (idxn[j] >= bscstart && idxn[j] < bscend) { 666 col = idxn[j] - bscstart; 667 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 668 } else { 669 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 670 #if defined(PETSC_USE_CTABLE) 671 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 672 data--; 673 #else 674 data = baij->colmap[idxn[j] / bs] - 1; 675 #endif 676 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 677 else { 678 col = data + idxn[j] % bs; 679 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 680 } 681 } 682 } 683 } 684 PetscFunctionReturn(PETSC_SUCCESS); 685 } 686 687 static PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 688 { 689 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 690 PetscReal sum[2], *lnorm2; 691 692 PetscFunctionBegin; 693 if (baij->size == 1) { 694 PetscCall(MatNorm(baij->A, type, norm)); 695 } else { 696 if (type == NORM_FROBENIUS) { 697 PetscCall(PetscMalloc1(2, &lnorm2)); 698 PetscCall(MatNorm(baij->A, type, lnorm2)); 699 *lnorm2 = (*lnorm2) * (*lnorm2); 700 lnorm2++; /* square power of norm(A) */ 701 PetscCall(MatNorm(baij->B, type, lnorm2)); 702 *lnorm2 = (*lnorm2) * (*lnorm2); 703 lnorm2--; /* square power of norm(B) */ 704 PetscCallMPI(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 705 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 706 PetscCall(PetscFree(lnorm2)); 707 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 708 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 709 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 710 PetscReal *rsum, vabs; 711 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 712 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 713 MatScalar *v; 714 715 PetscCall(PetscCalloc1(mat->cmap->N, &rsum)); 716 /* Amat */ 717 v = amat->a; 718 jj = amat->j; 719 for (brow = 0; brow < mbs; brow++) { 720 grow = bs * (rstart + brow); 721 nz = amat->i[brow + 1] - amat->i[brow]; 722 for (bcol = 0; bcol < nz; bcol++) { 723 gcol = bs * (rstart + *jj); 724 jj++; 725 for (col = 0; col < bs; col++) { 726 for (row = 0; row < bs; row++) { 727 vabs = PetscAbsScalar(*v); 728 v++; 729 rsum[gcol + col] += vabs; 730 /* non-diagonal block */ 731 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 732 } 733 } 734 } 735 PetscCall(PetscLogFlops(nz * bs * bs)); 736 } 737 /* Bmat */ 738 v = bmat->a; 739 jj = bmat->j; 740 for (brow = 0; brow < mbs; brow++) { 741 grow = bs * (rstart + brow); 742 nz = bmat->i[brow + 1] - bmat->i[brow]; 743 for (bcol = 0; bcol < nz; bcol++) { 744 gcol = bs * garray[*jj]; 745 jj++; 746 for (col = 0; col < bs; col++) { 747 for (row = 0; row < bs; row++) { 748 vabs = PetscAbsScalar(*v); 749 v++; 750 rsum[gcol + col] += vabs; 751 rsum[grow + row] += vabs; 752 } 753 } 754 } 755 PetscCall(PetscLogFlops(nz * bs * bs)); 756 } 757 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, rsum, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 758 *norm = 0.0; 759 for (col = 0; col < mat->cmap->N; col++) { 760 if (rsum[col] > *norm) *norm = rsum[col]; 761 } 762 PetscCall(PetscFree(rsum)); 763 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 764 } 765 PetscFunctionReturn(PETSC_SUCCESS); 766 } 767 768 static PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 769 { 770 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 771 PetscInt nstash, reallocs; 772 773 PetscFunctionBegin; 774 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 775 776 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 777 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 778 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 779 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 780 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 781 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 782 PetscFunctionReturn(PETSC_SUCCESS); 783 } 784 785 static PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 786 { 787 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 789 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 790 PetscInt *row, *col; 791 PetscBool all_assembled; 792 PetscMPIInt n; 793 PetscBool r1, r2, r3; 794 MatScalar *val; 795 796 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 797 PetscFunctionBegin; 798 if (!baij->donotstash && !mat->nooffprocentries) { 799 while (1) { 800 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 801 if (!flg) break; 802 803 for (i = 0; i < n;) { 804 /* Now identify the consecutive vals belonging to the same row */ 805 for (j = i, rstart = row[j]; j < n; j++) { 806 if (row[j] != rstart) break; 807 } 808 if (j < n) ncols = j - i; 809 else ncols = n - i; 810 /* Now assemble all these values with a single function call */ 811 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 812 i = j; 813 } 814 } 815 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 816 /* Now process the block-stash. Since the values are stashed column-oriented, 817 set the row-oriented flag to column-oriented, and after MatSetValues() 818 restore the original flags */ 819 r1 = baij->roworiented; 820 r2 = a->roworiented; 821 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 822 823 baij->roworiented = PETSC_FALSE; 824 a->roworiented = PETSC_FALSE; 825 826 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworiented */ 827 while (1) { 828 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 829 if (!flg) break; 830 831 for (i = 0; i < n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j = i, rstart = row[j]; j < n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j - i; 837 else ncols = n - i; 838 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 839 i = j; 840 } 841 } 842 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 843 844 baij->roworiented = r1; 845 a->roworiented = r2; 846 847 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 848 } 849 850 PetscCall(MatAssemblyBegin(baij->A, mode)); 851 PetscCall(MatAssemblyEnd(baij->A, mode)); 852 853 /* determine if any process has disassembled, if so we must 854 also disassemble ourselves, in order that we may reassemble. */ 855 /* 856 if nonzero structure of submatrix B cannot change then we know that 857 no process disassembled thus we can skip this stuff 858 */ 859 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 860 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 861 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 862 } 863 864 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ 865 PetscCall(MatAssemblyBegin(baij->B, mode)); 866 PetscCall(MatAssemblyEnd(baij->B, mode)); 867 868 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 869 870 baij->rowvalues = NULL; 871 872 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 873 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 874 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 875 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 876 } 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 881 #include <petscdraw.h> 882 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 883 { 884 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 885 PetscInt bs = mat->rmap->bs; 886 PetscMPIInt rank = baij->rank; 887 PetscBool isascii, isdraw; 888 PetscViewer sviewer; 889 PetscViewerFormat format; 890 891 PetscFunctionBegin; 892 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 893 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 894 if (isascii) { 895 PetscCall(PetscViewerGetFormat(viewer, &format)); 896 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 897 MatInfo info; 898 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 899 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 900 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 901 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 902 mat->rmap->bs, info.memory)); 903 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 904 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 905 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 906 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 907 PetscCall(PetscViewerFlush(viewer)); 908 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 909 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 910 PetscCall(VecScatterView(baij->Mvctx, viewer)); 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_FACTOR_INFO) PetscFunctionReturn(PETSC_SUCCESS); 913 } 914 915 if (isdraw) { 916 PetscDraw draw; 917 PetscBool isnull; 918 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 919 PetscCall(PetscDrawIsNull(draw, &isnull)); 920 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 921 } 922 923 { 924 /* assemble the entire matrix onto first processor. */ 925 Mat A; 926 Mat_SeqSBAIJ *Aloc; 927 Mat_SeqBAIJ *Bloc; 928 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 929 MatScalar *a; 930 const char *matname; 931 932 /* Should this be the same type as mat? */ 933 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 934 if (rank == 0) { 935 PetscCall(MatSetSizes(A, M, N, M, N)); 936 } else { 937 PetscCall(MatSetSizes(A, 0, 0, M, N)); 938 } 939 PetscCall(MatSetType(A, MATMPISBAIJ)); 940 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 941 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 942 943 /* copy over the A part */ 944 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 945 ai = Aloc->i; 946 aj = Aloc->j; 947 a = Aloc->a; 948 PetscCall(PetscMalloc1(bs, &rvals)); 949 950 for (i = 0; i < mbs; i++) { 951 rvals[0] = bs * (baij->rstartbs + i); 952 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 953 for (j = ai[i]; j < ai[i + 1]; j++) { 954 col = (baij->cstartbs + aj[j]) * bs; 955 for (k = 0; k < bs; k++) { 956 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 957 col++; 958 a += bs; 959 } 960 } 961 } 962 /* copy over the B part */ 963 Bloc = (Mat_SeqBAIJ *)baij->B->data; 964 ai = Bloc->i; 965 aj = Bloc->j; 966 a = Bloc->a; 967 for (i = 0; i < mbs; i++) { 968 rvals[0] = bs * (baij->rstartbs + i); 969 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 970 for (j = ai[i]; j < ai[i + 1]; j++) { 971 col = baij->garray[aj[j]] * bs; 972 for (k = 0; k < bs; k++) { 973 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 974 col++; 975 a += bs; 976 } 977 } 978 } 979 PetscCall(PetscFree(rvals)); 980 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 981 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 982 /* 983 Everyone has to call to draw the matrix since the graphics waits are 984 synchronized across all processors that share the PetscDraw object 985 */ 986 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 987 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 988 if (rank == 0) { 989 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)A->data)->A, matname)); 990 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)A->data)->A, sviewer)); 991 } 992 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 993 PetscCall(MatDestroy(&A)); 994 } 995 PetscFunctionReturn(PETSC_SUCCESS); 996 } 997 998 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 999 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 1000 1001 static PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 1002 { 1003 PetscBool isascii, isdraw, issocket, isbinary; 1004 1005 PetscFunctionBegin; 1006 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1007 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1008 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1009 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1010 if (isascii || isdraw || issocket) PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 1011 else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 1012 PetscFunctionReturn(PETSC_SUCCESS); 1013 } 1014 1015 #if defined(PETSC_USE_COMPLEX) 1016 static PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1017 { 1018 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1019 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1020 PetscScalar *from; 1021 const PetscScalar *x; 1022 1023 PetscFunctionBegin; 1024 /* diagonal part */ 1025 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1026 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1027 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1028 PetscCall(VecZeroEntries(a->slvec1b)); 1029 1030 /* subdiagonal part */ 1031 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1032 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1033 1034 /* copy x into the vec slvec0 */ 1035 PetscCall(VecGetArray(a->slvec0, &from)); 1036 PetscCall(VecGetArrayRead(xx, &x)); 1037 1038 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1039 PetscCall(VecRestoreArray(a->slvec0, &from)); 1040 PetscCall(VecRestoreArrayRead(xx, &x)); 1041 1042 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1043 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1044 /* supperdiagonal part */ 1045 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1046 PetscFunctionReturn(PETSC_SUCCESS); 1047 } 1048 #endif 1049 1050 static PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1051 { 1052 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1053 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1054 PetscScalar *from; 1055 const PetscScalar *x; 1056 1057 PetscFunctionBegin; 1058 /* diagonal part */ 1059 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1060 /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */ 1061 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1062 PetscCall(VecZeroEntries(a->slvec1b)); 1063 1064 /* subdiagonal part */ 1065 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1066 1067 /* copy x into the vec slvec0 */ 1068 PetscCall(VecGetArray(a->slvec0, &from)); 1069 PetscCall(VecGetArrayRead(xx, &x)); 1070 1071 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1072 PetscCall(VecRestoreArray(a->slvec0, &from)); 1073 PetscCall(VecRestoreArrayRead(xx, &x)); 1074 1075 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1076 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1077 /* supperdiagonal part */ 1078 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1079 PetscFunctionReturn(PETSC_SUCCESS); 1080 } 1081 1082 #if PetscDefined(USE_COMPLEX) 1083 static PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1084 { 1085 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1086 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1087 PetscScalar *from; 1088 const PetscScalar *x; 1089 1090 PetscFunctionBegin; 1091 /* diagonal part */ 1092 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1093 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1094 PetscCall(VecZeroEntries(a->slvec1b)); 1095 1096 /* subdiagonal part */ 1097 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1098 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1099 1100 /* copy x into the vec slvec0 */ 1101 PetscCall(VecGetArray(a->slvec0, &from)); 1102 PetscCall(VecGetArrayRead(xx, &x)); 1103 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1104 PetscCall(VecRestoreArray(a->slvec0, &from)); 1105 1106 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1107 PetscCall(VecRestoreArrayRead(xx, &x)); 1108 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1109 1110 /* supperdiagonal part */ 1111 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1112 PetscFunctionReturn(PETSC_SUCCESS); 1113 } 1114 #endif 1115 1116 static PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1117 { 1118 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1119 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1120 PetscScalar *from; 1121 const PetscScalar *x; 1122 1123 PetscFunctionBegin; 1124 /* diagonal part */ 1125 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1126 PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b)); 1127 PetscCall(VecZeroEntries(a->slvec1b)); 1128 1129 /* subdiagonal part */ 1130 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1131 1132 /* copy x into the vec slvec0 */ 1133 PetscCall(VecGetArray(a->slvec0, &from)); 1134 PetscCall(VecGetArrayRead(xx, &x)); 1135 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1136 PetscCall(VecRestoreArray(a->slvec0, &from)); 1137 1138 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1139 PetscCall(VecRestoreArrayRead(xx, &x)); 1140 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1141 1142 /* supperdiagonal part */ 1143 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 /* 1148 This only works correctly for square matrices where the subblock A->A is the 1149 diagonal block 1150 */ 1151 static PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1152 { 1153 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1154 1155 PetscFunctionBegin; 1156 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1157 PetscCall(MatGetDiagonal(a->A, v)); 1158 PetscFunctionReturn(PETSC_SUCCESS); 1159 } 1160 1161 static PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1162 { 1163 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCall(MatScale(a->A, aa)); 1167 PetscCall(MatScale(a->B, aa)); 1168 PetscFunctionReturn(PETSC_SUCCESS); 1169 } 1170 1171 static PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1172 { 1173 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1174 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1175 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1176 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1177 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1178 1179 PetscFunctionBegin; 1180 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1181 mat->getrowactive = PETSC_TRUE; 1182 1183 if (!mat->rowvalues && (idx || v)) { 1184 /* 1185 allocate enough space to hold information from the longest row. 1186 */ 1187 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1188 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1189 PetscInt max = 1, mbs = mat->mbs, tmp; 1190 for (i = 0; i < mbs; i++) { 1191 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1192 if (max < tmp) max = tmp; 1193 } 1194 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1195 } 1196 1197 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1198 lrow = row - brstart; /* local row index */ 1199 1200 pvA = &vworkA; 1201 pcA = &cworkA; 1202 pvB = &vworkB; 1203 pcB = &cworkB; 1204 if (!v) { 1205 pvA = NULL; 1206 pvB = NULL; 1207 } 1208 if (!idx) { 1209 pcA = NULL; 1210 if (!v) pcB = NULL; 1211 } 1212 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1213 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1214 nztot = nzA + nzB; 1215 1216 cmap = mat->garray; 1217 if (v || idx) { 1218 if (nztot) { 1219 /* Sort by increasing column numbers, assuming A and B already sorted */ 1220 PetscInt imark = -1; 1221 if (v) { 1222 *v = v_p = mat->rowvalues; 1223 for (i = 0; i < nzB; i++) { 1224 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1225 else break; 1226 } 1227 imark = i; 1228 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1229 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1230 } 1231 if (idx) { 1232 *idx = idx_p = mat->rowindices; 1233 if (imark > -1) { 1234 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1235 } else { 1236 for (i = 0; i < nzB; i++) { 1237 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1238 else break; 1239 } 1240 imark = i; 1241 } 1242 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1243 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1244 } 1245 } else { 1246 if (idx) *idx = NULL; 1247 if (v) *v = NULL; 1248 } 1249 } 1250 *nz = nztot; 1251 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1252 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1253 PetscFunctionReturn(PETSC_SUCCESS); 1254 } 1255 1256 static PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1257 { 1258 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1259 1260 PetscFunctionBegin; 1261 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1262 baij->getrowactive = PETSC_FALSE; 1263 PetscFunctionReturn(PETSC_SUCCESS); 1264 } 1265 1266 static PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1267 { 1268 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1269 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1270 1271 PetscFunctionBegin; 1272 aA->getrow_utriangular = PETSC_TRUE; 1273 PetscFunctionReturn(PETSC_SUCCESS); 1274 } 1275 static PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1276 { 1277 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1278 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1279 1280 PetscFunctionBegin; 1281 aA->getrow_utriangular = PETSC_FALSE; 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 1285 static PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1286 { 1287 PetscFunctionBegin; 1288 if (PetscDefined(USE_COMPLEX)) { 1289 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1290 1291 PetscCall(MatConjugate(a->A)); 1292 PetscCall(MatConjugate(a->B)); 1293 } 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } 1296 1297 static PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1298 { 1299 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1300 1301 PetscFunctionBegin; 1302 PetscCall(MatRealPart(a->A)); 1303 PetscCall(MatRealPart(a->B)); 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306 1307 static PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1308 { 1309 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1310 1311 PetscFunctionBegin; 1312 PetscCall(MatImaginaryPart(a->A)); 1313 PetscCall(MatImaginaryPart(a->B)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } 1316 1317 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1318 Input: isrow - distributed(parallel), 1319 iscol_local - locally owned (seq) 1320 */ 1321 static PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1322 { 1323 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1324 const PetscInt *ptr1, *ptr2; 1325 1326 PetscFunctionBegin; 1327 *flg = PETSC_FALSE; 1328 PetscCall(ISGetLocalSize(isrow, &sz1)); 1329 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1330 if (sz1 > sz2) PetscFunctionReturn(PETSC_SUCCESS); 1331 1332 PetscCall(ISGetIndices(isrow, &ptr1)); 1333 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1334 1335 PetscCall(PetscMalloc1(sz1, &a1)); 1336 PetscCall(PetscMalloc1(sz2, &a2)); 1337 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1338 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1339 PetscCall(PetscSortInt(sz1, a1)); 1340 PetscCall(PetscSortInt(sz2, a2)); 1341 1342 nmatch = 0; 1343 k = 0; 1344 for (i = 0; i < sz1; i++) { 1345 for (j = k; j < sz2; j++) { 1346 if (a1[i] == a2[j]) { 1347 k = j; 1348 nmatch++; 1349 break; 1350 } 1351 } 1352 } 1353 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1354 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1355 PetscCall(PetscFree(a1)); 1356 PetscCall(PetscFree(a2)); 1357 if (nmatch < sz1) { 1358 *flg = PETSC_FALSE; 1359 } else { 1360 *flg = PETSC_TRUE; 1361 } 1362 PetscFunctionReturn(PETSC_SUCCESS); 1363 } 1364 1365 static PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1366 { 1367 Mat C[2]; 1368 IS iscol_local, isrow_local; 1369 PetscInt csize, csize_local, rsize; 1370 PetscBool isequal, issorted, isidentity = PETSC_FALSE; 1371 1372 PetscFunctionBegin; 1373 PetscCall(ISGetLocalSize(iscol, &csize)); 1374 PetscCall(ISGetLocalSize(isrow, &rsize)); 1375 if (call == MAT_REUSE_MATRIX) { 1376 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1377 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1378 } else { 1379 PetscCall(ISAllGather(iscol, &iscol_local)); 1380 PetscCall(ISSorted(iscol_local, &issorted)); 1381 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must be sorted"); 1382 } 1383 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1384 if (!isequal) { 1385 PetscCall(ISGetLocalSize(iscol_local, &csize_local)); 1386 isidentity = (PetscBool)(mat->cmap->N == csize_local); 1387 if (!isidentity) { 1388 if (call == MAT_REUSE_MATRIX) { 1389 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather_other", (PetscObject *)&isrow_local)); 1390 PetscCheck(isrow_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1391 } else { 1392 PetscCall(ISAllGather(isrow, &isrow_local)); 1393 PetscCall(ISSorted(isrow_local, &issorted)); 1394 PetscCheck(issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, isrow must be sorted"); 1395 } 1396 } 1397 } 1398 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1399 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, isequal || isidentity ? call : MAT_INITIAL_MATRIX, isequal || isidentity ? newmat : C, (PetscBool)(isequal || isidentity))); 1400 if (!isequal && !isidentity) { 1401 if (call == MAT_INITIAL_MATRIX) { 1402 IS intersect; 1403 PetscInt ni; 1404 1405 PetscCall(ISIntersect(isrow_local, iscol_local, &intersect)); 1406 PetscCall(ISGetLocalSize(intersect, &ni)); 1407 PetscCall(ISDestroy(&intersect)); 1408 PetscCheck(ni == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create such a submatrix: for symmetric format, when requesting an off-diagonal submatrix, isrow and iscol should have an empty intersection (number of common indices is %" PetscInt_FMT ")", ni); 1409 } 1410 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, iscol, isrow_local, rsize, MAT_INITIAL_MATRIX, C + 1, PETSC_FALSE)); 1411 PetscCall(MatTranspose(C[1], MAT_INPLACE_MATRIX, C + 1)); 1412 PetscCall(MatAXPY(C[0], 1.0, C[1], DIFFERENT_NONZERO_PATTERN)); 1413 if (call == MAT_REUSE_MATRIX) PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1414 else if (mat->rmap->bs == 1) PetscCall(MatConvert(C[0], MATAIJ, MAT_INITIAL_MATRIX, newmat)); 1415 else PetscCall(MatCopy(C[0], *newmat, SAME_NONZERO_PATTERN)); 1416 PetscCall(MatDestroy(C)); 1417 PetscCall(MatDestroy(C + 1)); 1418 } 1419 if (call == MAT_INITIAL_MATRIX) { 1420 if (!isequal && !isidentity) { 1421 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather_other", (PetscObject)isrow_local)); 1422 PetscCall(ISDestroy(&isrow_local)); 1423 } 1424 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1425 PetscCall(ISDestroy(&iscol_local)); 1426 } 1427 PetscFunctionReturn(PETSC_SUCCESS); 1428 } 1429 1430 static PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1431 { 1432 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1433 1434 PetscFunctionBegin; 1435 PetscCall(MatZeroEntries(l->A)); 1436 PetscCall(MatZeroEntries(l->B)); 1437 PetscFunctionReturn(PETSC_SUCCESS); 1438 } 1439 1440 static PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1441 { 1442 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1443 Mat A = a->A, B = a->B; 1444 PetscLogDouble isend[5], irecv[5]; 1445 1446 PetscFunctionBegin; 1447 info->block_size = (PetscReal)matin->rmap->bs; 1448 1449 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1450 1451 isend[0] = info->nz_used; 1452 isend[1] = info->nz_allocated; 1453 isend[2] = info->nz_unneeded; 1454 isend[3] = info->memory; 1455 isend[4] = info->mallocs; 1456 1457 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1458 1459 isend[0] += info->nz_used; 1460 isend[1] += info->nz_allocated; 1461 isend[2] += info->nz_unneeded; 1462 isend[3] += info->memory; 1463 isend[4] += info->mallocs; 1464 if (flag == MAT_LOCAL) { 1465 info->nz_used = isend[0]; 1466 info->nz_allocated = isend[1]; 1467 info->nz_unneeded = isend[2]; 1468 info->memory = isend[3]; 1469 info->mallocs = isend[4]; 1470 } else if (flag == MAT_GLOBAL_MAX) { 1471 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1472 1473 info->nz_used = irecv[0]; 1474 info->nz_allocated = irecv[1]; 1475 info->nz_unneeded = irecv[2]; 1476 info->memory = irecv[3]; 1477 info->mallocs = irecv[4]; 1478 } else if (flag == MAT_GLOBAL_SUM) { 1479 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1480 1481 info->nz_used = irecv[0]; 1482 info->nz_allocated = irecv[1]; 1483 info->nz_unneeded = irecv[2]; 1484 info->memory = irecv[3]; 1485 info->mallocs = irecv[4]; 1486 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1487 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1488 info->fill_ratio_needed = 0; 1489 info->factor_mallocs = 0; 1490 PetscFunctionReturn(PETSC_SUCCESS); 1491 } 1492 1493 static PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1494 { 1495 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1496 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1497 1498 PetscFunctionBegin; 1499 switch (op) { 1500 case MAT_NEW_NONZERO_LOCATIONS: 1501 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1502 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1503 case MAT_KEEP_NONZERO_PATTERN: 1504 case MAT_NEW_NONZERO_LOCATION_ERR: 1505 MatCheckPreallocated(A, 1); 1506 PetscCall(MatSetOption(a->A, op, flg)); 1507 PetscCall(MatSetOption(a->B, op, flg)); 1508 break; 1509 case MAT_ROW_ORIENTED: 1510 MatCheckPreallocated(A, 1); 1511 a->roworiented = flg; 1512 1513 PetscCall(MatSetOption(a->A, op, flg)); 1514 PetscCall(MatSetOption(a->B, op, flg)); 1515 break; 1516 case MAT_IGNORE_OFF_PROC_ENTRIES: 1517 a->donotstash = flg; 1518 break; 1519 case MAT_USE_HASH_TABLE: 1520 a->ht_flag = flg; 1521 break; 1522 case MAT_HERMITIAN: 1523 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1524 #if defined(PETSC_USE_COMPLEX) 1525 if (flg) { /* need different mat-vec ops */ 1526 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1527 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1528 A->ops->multtranspose = NULL; 1529 A->ops->multtransposeadd = NULL; 1530 } 1531 #endif 1532 break; 1533 case MAT_SPD: 1534 case MAT_SYMMETRIC: 1535 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1536 #if defined(PETSC_USE_COMPLEX) 1537 if (flg) { /* restore to use default mat-vec ops */ 1538 A->ops->mult = MatMult_MPISBAIJ; 1539 A->ops->multadd = MatMultAdd_MPISBAIJ; 1540 A->ops->multtranspose = MatMult_MPISBAIJ; 1541 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1542 } 1543 #endif 1544 break; 1545 case MAT_STRUCTURALLY_SYMMETRIC: 1546 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1547 break; 1548 case MAT_IGNORE_LOWER_TRIANGULAR: 1549 case MAT_ERROR_LOWER_TRIANGULAR: 1550 aA->ignore_ltriangular = flg; 1551 break; 1552 case MAT_GETROW_UPPERTRIANGULAR: 1553 aA->getrow_utriangular = flg; 1554 break; 1555 default: 1556 break; 1557 } 1558 PetscFunctionReturn(PETSC_SUCCESS); 1559 } 1560 1561 static PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1562 { 1563 PetscFunctionBegin; 1564 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1565 if (reuse == MAT_INITIAL_MATRIX) { 1566 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1567 } else if (reuse == MAT_REUSE_MATRIX) { 1568 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1569 } 1570 PetscFunctionReturn(PETSC_SUCCESS); 1571 } 1572 1573 static PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1574 { 1575 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1576 Mat a = baij->A, b = baij->B; 1577 PetscInt nv, m, n; 1578 1579 PetscFunctionBegin; 1580 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1581 1582 PetscCall(MatGetLocalSize(mat, &m, &n)); 1583 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1584 1585 PetscCall(VecGetLocalSize(rr, &nv)); 1586 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1587 1588 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1589 1590 /* left diagonalscale the off-diagonal part */ 1591 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1592 1593 /* scale the diagonal part */ 1594 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1595 1596 /* right diagonalscale the off-diagonal part */ 1597 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1598 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 static PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1603 { 1604 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1605 1606 PetscFunctionBegin; 1607 PetscCall(MatSetUnfactored(a->A)); 1608 PetscFunctionReturn(PETSC_SUCCESS); 1609 } 1610 1611 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1612 1613 static PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1614 { 1615 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1616 Mat a, b, c, d; 1617 PetscBool flg; 1618 1619 PetscFunctionBegin; 1620 a = matA->A; 1621 b = matA->B; 1622 c = matB->A; 1623 d = matB->B; 1624 1625 PetscCall(MatEqual(a, c, &flg)); 1626 if (flg) PetscCall(MatEqual(b, d, &flg)); 1627 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1628 PetscFunctionReturn(PETSC_SUCCESS); 1629 } 1630 1631 static PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1632 { 1633 PetscBool isbaij; 1634 1635 PetscFunctionBegin; 1636 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1637 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1638 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1639 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1640 PetscCall(MatGetRowUpperTriangular(A)); 1641 PetscCall(MatCopy_Basic(A, B, str)); 1642 PetscCall(MatRestoreRowUpperTriangular(A)); 1643 } else { 1644 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1645 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1646 1647 PetscCall(MatCopy(a->A, b->A, str)); 1648 PetscCall(MatCopy(a->B, b->B, str)); 1649 } 1650 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1651 PetscFunctionReturn(PETSC_SUCCESS); 1652 } 1653 1654 static PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1655 { 1656 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1657 PetscBLASInt bnz, one = 1; 1658 Mat_SeqSBAIJ *xa, *ya; 1659 Mat_SeqBAIJ *xb, *yb; 1660 1661 PetscFunctionBegin; 1662 if (str == SAME_NONZERO_PATTERN) { 1663 PetscScalar alpha = a; 1664 xa = (Mat_SeqSBAIJ *)xx->A->data; 1665 ya = (Mat_SeqSBAIJ *)yy->A->data; 1666 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1667 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1668 xb = (Mat_SeqBAIJ *)xx->B->data; 1669 yb = (Mat_SeqBAIJ *)yy->B->data; 1670 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1671 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1672 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1673 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1674 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1675 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1676 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1677 } else { 1678 Mat B; 1679 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1680 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1681 PetscCall(MatGetRowUpperTriangular(X)); 1682 PetscCall(MatGetRowUpperTriangular(Y)); 1683 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1684 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1685 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1686 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1687 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1688 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1689 PetscCall(MatSetType(B, MATMPISBAIJ)); 1690 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1691 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1692 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1693 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1694 PetscCall(MatHeaderMerge(Y, &B)); 1695 PetscCall(PetscFree(nnz_d)); 1696 PetscCall(PetscFree(nnz_o)); 1697 PetscCall(MatRestoreRowUpperTriangular(X)); 1698 PetscCall(MatRestoreRowUpperTriangular(Y)); 1699 } 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 static PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1704 { 1705 PetscInt i; 1706 PetscBool flg; 1707 1708 PetscFunctionBegin; 1709 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1710 for (i = 0; i < n; i++) { 1711 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1712 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1713 } 1714 PetscFunctionReturn(PETSC_SUCCESS); 1715 } 1716 1717 static PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1718 { 1719 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1720 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1721 1722 PetscFunctionBegin; 1723 if (!Y->preallocated) { 1724 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1725 } else if (!aij->nz) { 1726 PetscInt nonew = aij->nonew; 1727 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1728 aij->nonew = nonew; 1729 } 1730 PetscCall(MatShift_Basic(Y, a)); 1731 PetscFunctionReturn(PETSC_SUCCESS); 1732 } 1733 1734 static PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1735 { 1736 PetscFunctionBegin; 1737 *a = ((Mat_MPISBAIJ *)A->data)->A; 1738 PetscFunctionReturn(PETSC_SUCCESS); 1739 } 1740 1741 static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep) 1742 { 1743 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1744 1745 PetscFunctionBegin; 1746 PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 1747 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 1748 PetscFunctionReturn(PETSC_SUCCESS); 1749 } 1750 1751 static PetscErrorCode MatLoad_MPISBAIJ(Mat, PetscViewer); 1752 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat, Vec, PetscInt[]); 1753 static PetscErrorCode MatSOR_MPISBAIJ(Mat, Vec, PetscReal, MatSORType, PetscReal, PetscInt, PetscInt, Vec); 1754 1755 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1756 MatGetRow_MPISBAIJ, 1757 MatRestoreRow_MPISBAIJ, 1758 MatMult_MPISBAIJ, 1759 /* 4*/ MatMultAdd_MPISBAIJ, 1760 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1761 MatMultAdd_MPISBAIJ, 1762 NULL, 1763 NULL, 1764 NULL, 1765 /* 10*/ NULL, 1766 NULL, 1767 NULL, 1768 MatSOR_MPISBAIJ, 1769 MatTranspose_MPISBAIJ, 1770 /* 15*/ MatGetInfo_MPISBAIJ, 1771 MatEqual_MPISBAIJ, 1772 MatGetDiagonal_MPISBAIJ, 1773 MatDiagonalScale_MPISBAIJ, 1774 MatNorm_MPISBAIJ, 1775 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1776 MatAssemblyEnd_MPISBAIJ, 1777 MatSetOption_MPISBAIJ, 1778 MatZeroEntries_MPISBAIJ, 1779 /* 24*/ NULL, 1780 NULL, 1781 NULL, 1782 NULL, 1783 NULL, 1784 /* 29*/ MatSetUp_MPI_Hash, 1785 NULL, 1786 NULL, 1787 MatGetDiagonalBlock_MPISBAIJ, 1788 NULL, 1789 /* 34*/ MatDuplicate_MPISBAIJ, 1790 NULL, 1791 NULL, 1792 NULL, 1793 NULL, 1794 /* 39*/ MatAXPY_MPISBAIJ, 1795 MatCreateSubMatrices_MPISBAIJ, 1796 MatIncreaseOverlap_MPISBAIJ, 1797 MatGetValues_MPISBAIJ, 1798 MatCopy_MPISBAIJ, 1799 /* 44*/ NULL, 1800 MatScale_MPISBAIJ, 1801 MatShift_MPISBAIJ, 1802 NULL, 1803 NULL, 1804 /* 49*/ NULL, 1805 NULL, 1806 NULL, 1807 NULL, 1808 NULL, 1809 /* 54*/ NULL, 1810 NULL, 1811 MatSetUnfactored_MPISBAIJ, 1812 NULL, 1813 MatSetValuesBlocked_MPISBAIJ, 1814 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1815 NULL, 1816 NULL, 1817 NULL, 1818 NULL, 1819 /* 64*/ NULL, 1820 NULL, 1821 NULL, 1822 NULL, 1823 MatGetRowMaxAbs_MPISBAIJ, 1824 /* 69*/ NULL, 1825 MatConvert_MPISBAIJ_Basic, 1826 NULL, 1827 NULL, 1828 NULL, 1829 NULL, 1830 NULL, 1831 NULL, 1832 NULL, 1833 MatLoad_MPISBAIJ, 1834 /* 79*/ NULL, 1835 NULL, 1836 NULL, 1837 NULL, 1838 NULL, 1839 /* 84*/ NULL, 1840 NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 /* 89*/ NULL, 1845 NULL, 1846 NULL, 1847 NULL, 1848 MatConjugate_MPISBAIJ, 1849 /* 94*/ NULL, 1850 NULL, 1851 MatRealPart_MPISBAIJ, 1852 MatImaginaryPart_MPISBAIJ, 1853 MatGetRowUpperTriangular_MPISBAIJ, 1854 /* 99*/ MatRestoreRowUpperTriangular_MPISBAIJ, 1855 NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 /*104*/ NULL, 1860 NULL, 1861 NULL, 1862 NULL, 1863 NULL, 1864 /*109*/ NULL, 1865 NULL, 1866 NULL, 1867 NULL, 1868 NULL, 1869 /*114*/ NULL, 1870 NULL, 1871 NULL, 1872 NULL, 1873 NULL, 1874 /*119*/ NULL, 1875 NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 /*124*/ NULL, 1880 NULL, 1881 MatSetBlockSizes_Default, 1882 NULL, 1883 NULL, 1884 /*129*/ NULL, 1885 MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1886 NULL, 1887 NULL, 1888 NULL, 1889 /*134*/ NULL, 1890 NULL, 1891 MatEliminateZeros_MPISBAIJ, 1892 NULL, 1893 NULL, 1894 /*139*/ NULL, 1895 NULL, 1896 MatCopyHashToXAIJ_MPI_Hash, 1897 NULL, 1898 NULL}; 1899 1900 static PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1901 { 1902 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1903 PetscInt i, mbs, Mbs; 1904 PetscMPIInt size; 1905 1906 PetscFunctionBegin; 1907 if (B->hash_active) { 1908 B->ops[0] = b->cops; 1909 B->hash_active = PETSC_FALSE; 1910 } 1911 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1912 PetscCall(MatSetBlockSize(B, bs)); 1913 PetscCall(PetscLayoutSetUp(B->rmap)); 1914 PetscCall(PetscLayoutSetUp(B->cmap)); 1915 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1916 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1917 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1918 1919 mbs = B->rmap->n / bs; 1920 Mbs = B->rmap->N / bs; 1921 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1922 1923 B->rmap->bs = bs; 1924 b->bs2 = bs * bs; 1925 b->mbs = mbs; 1926 b->Mbs = Mbs; 1927 b->nbs = B->cmap->n / bs; 1928 b->Nbs = B->cmap->N / bs; 1929 1930 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1931 b->rstartbs = B->rmap->rstart / bs; 1932 b->rendbs = B->rmap->rend / bs; 1933 1934 b->cstartbs = B->cmap->rstart / bs; 1935 b->cendbs = B->cmap->rend / bs; 1936 1937 #if defined(PETSC_USE_CTABLE) 1938 PetscCall(PetscHMapIDestroy(&b->colmap)); 1939 #else 1940 PetscCall(PetscFree(b->colmap)); 1941 #endif 1942 PetscCall(PetscFree(b->garray)); 1943 PetscCall(VecDestroy(&b->lvec)); 1944 PetscCall(VecScatterDestroy(&b->Mvctx)); 1945 PetscCall(VecDestroy(&b->slvec0)); 1946 PetscCall(VecDestroy(&b->slvec0b)); 1947 PetscCall(VecDestroy(&b->slvec1)); 1948 PetscCall(VecDestroy(&b->slvec1a)); 1949 PetscCall(VecDestroy(&b->slvec1b)); 1950 PetscCall(VecScatterDestroy(&b->sMvctx)); 1951 1952 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1953 1954 MatSeqXAIJGetOptions_Private(b->B); 1955 PetscCall(MatDestroy(&b->B)); 1956 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1957 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1958 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1959 MatSeqXAIJRestoreOptions_Private(b->B); 1960 1961 MatSeqXAIJGetOptions_Private(b->A); 1962 PetscCall(MatDestroy(&b->A)); 1963 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1964 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1965 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1966 MatSeqXAIJRestoreOptions_Private(b->A); 1967 1968 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1969 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1970 1971 B->preallocated = PETSC_TRUE; 1972 B->was_assembled = PETSC_FALSE; 1973 B->assembled = PETSC_FALSE; 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1978 { 1979 PetscInt m, rstart, cend; 1980 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1981 const PetscInt *JJ = NULL; 1982 PetscScalar *values = NULL; 1983 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 1984 PetscBool nooffprocentries; 1985 1986 PetscFunctionBegin; 1987 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 1988 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 1989 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 1990 PetscCall(PetscLayoutSetUp(B->rmap)); 1991 PetscCall(PetscLayoutSetUp(B->cmap)); 1992 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1993 m = B->rmap->n / bs; 1994 rstart = B->rmap->rstart / bs; 1995 cend = B->cmap->rend / bs; 1996 1997 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 1998 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 1999 for (i = 0; i < m; i++) { 2000 nz = ii[i + 1] - ii[i]; 2001 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2002 /* count the ones on the diagonal and above, split into diagonal and off-diagonal portions. */ 2003 JJ = jj + ii[i]; 2004 bd = 0; 2005 for (j = 0; j < nz; j++) { 2006 if (*JJ >= i + rstart) break; 2007 JJ++; 2008 bd++; 2009 } 2010 d = 0; 2011 for (; j < nz; j++) { 2012 if (*JJ++ >= cend) break; 2013 d++; 2014 } 2015 d_nnz[i] = d; 2016 o_nnz[i] = nz - d - bd; 2017 nz = nz - bd; 2018 nz_max = PetscMax(nz_max, nz); 2019 } 2020 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2021 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2022 PetscCall(PetscFree2(d_nnz, o_nnz)); 2023 2024 values = (PetscScalar *)V; 2025 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2026 for (i = 0; i < m; i++) { 2027 PetscInt row = i + rstart; 2028 PetscInt ncols = ii[i + 1] - ii[i]; 2029 const PetscInt *icols = jj + ii[i]; 2030 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2031 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2032 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2033 } else { /* block ordering does not match so we can only insert one block at a time. */ 2034 PetscInt j; 2035 for (j = 0; j < ncols; j++) { 2036 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2037 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2038 } 2039 } 2040 } 2041 2042 if (!V) PetscCall(PetscFree(values)); 2043 nooffprocentries = B->nooffprocentries; 2044 B->nooffprocentries = PETSC_TRUE; 2045 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2046 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2047 B->nooffprocentries = nooffprocentries; 2048 2049 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2050 PetscFunctionReturn(PETSC_SUCCESS); 2051 } 2052 2053 /*MC 2054 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2055 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2056 the matrix is stored. 2057 2058 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2059 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2060 2061 Options Database Key: 2062 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2063 2064 Level: beginner 2065 2066 Note: 2067 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2068 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2069 2070 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2071 M*/ 2072 2073 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2074 { 2075 Mat_MPISBAIJ *b; 2076 PetscBool flg = PETSC_FALSE; 2077 2078 PetscFunctionBegin; 2079 PetscCall(PetscNew(&b)); 2080 B->data = (void *)b; 2081 B->ops[0] = MatOps_Values; 2082 2083 B->ops->destroy = MatDestroy_MPISBAIJ; 2084 B->ops->view = MatView_MPISBAIJ; 2085 B->assembled = PETSC_FALSE; 2086 B->insertmode = NOT_SET_VALUES; 2087 2088 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2089 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2090 2091 /* build local table of row and column ownerships */ 2092 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2093 2094 /* build cache for off array entries formed */ 2095 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2096 2097 b->donotstash = PETSC_FALSE; 2098 b->colmap = NULL; 2099 b->garray = NULL; 2100 b->roworiented = PETSC_TRUE; 2101 2102 /* stuff used in block assembly */ 2103 b->barray = NULL; 2104 2105 /* stuff used for matrix vector multiply */ 2106 b->lvec = NULL; 2107 b->Mvctx = NULL; 2108 b->slvec0 = NULL; 2109 b->slvec0b = NULL; 2110 b->slvec1 = NULL; 2111 b->slvec1a = NULL; 2112 b->slvec1b = NULL; 2113 b->sMvctx = NULL; 2114 2115 /* stuff for MatGetRow() */ 2116 b->rowindices = NULL; 2117 b->rowvalues = NULL; 2118 b->getrowactive = PETSC_FALSE; 2119 2120 /* hash table stuff */ 2121 b->ht = NULL; 2122 b->hd = NULL; 2123 b->ht_size = 0; 2124 b->ht_flag = PETSC_FALSE; 2125 b->ht_fact = 0; 2126 b->ht_total_ct = 0; 2127 b->ht_insert_ct = 0; 2128 2129 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2130 b->ijonly = PETSC_FALSE; 2131 2132 b->in_loc = NULL; 2133 b->v_loc = NULL; 2134 b->n_loc = 0; 2135 2136 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2137 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2138 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2139 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2140 #if defined(PETSC_HAVE_ELEMENTAL) 2141 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2142 #endif 2143 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 2144 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2145 #endif 2146 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2147 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2148 2149 B->symmetric = PETSC_BOOL3_TRUE; 2150 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2151 B->symmetry_eternal = PETSC_TRUE; 2152 B->structural_symmetry_eternal = PETSC_TRUE; 2153 #if !defined(PETSC_USE_COMPLEX) 2154 B->hermitian = PETSC_BOOL3_TRUE; 2155 #endif 2156 2157 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2158 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2159 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2160 if (flg) { 2161 PetscReal fact = 1.39; 2162 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2163 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2164 if (fact <= 1.0) fact = 1.39; 2165 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2166 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2167 } 2168 PetscOptionsEnd(); 2169 PetscFunctionReturn(PETSC_SUCCESS); 2170 } 2171 2172 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2173 /*MC 2174 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2175 2176 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2177 and `MATMPISBAIJ` otherwise. 2178 2179 Options Database Key: 2180 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2181 2182 Level: beginner 2183 2184 .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2185 M*/ 2186 2187 /*@ 2188 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2189 the user should preallocate the matrix storage by setting the parameters 2190 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2191 performance can be increased by more than a factor of 50. 2192 2193 Collective 2194 2195 Input Parameters: 2196 + B - the matrix 2197 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2198 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2199 . d_nz - number of block nonzeros per block row in diagonal portion of local 2200 submatrix (same for all local rows) 2201 . d_nnz - array containing the number of block nonzeros in the various block rows 2202 in the upper triangular and diagonal part of the in diagonal portion of the local 2203 (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room 2204 for the diagonal entry and set a value even if it is zero. 2205 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2206 submatrix (same for all local rows). 2207 - o_nnz - array containing the number of nonzeros in the various block rows of the 2208 off-diagonal portion of the local submatrix that is right of the diagonal 2209 (possibly different for each block row) or `NULL`. 2210 2211 Options Database Keys: 2212 + -mat_no_unroll - uses code that does not unroll the loops in the 2213 block calculations (much slower) 2214 - -mat_block_size - size of the blocks to use 2215 2216 Level: intermediate 2217 2218 Notes: 2219 2220 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2221 than it must be used on all processors that share the object for that argument. 2222 2223 If the *_nnz parameter is given then the *_nz parameter is ignored 2224 2225 Storage Information: 2226 For a square global matrix we define each processor's diagonal portion 2227 to be its local rows and the corresponding columns (a square submatrix); 2228 each processor's off-diagonal portion encompasses the remainder of the 2229 local matrix (a rectangular submatrix). 2230 2231 The user can specify preallocated storage for the diagonal part of 2232 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2233 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2234 memory allocation. Likewise, specify preallocated storage for the 2235 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2236 2237 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2238 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2239 You can also run with the option `-info` and look for messages with the string 2240 malloc in them to see if additional memory allocation was needed. 2241 2242 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2243 the figure below we depict these three local rows and all columns (0-11). 2244 2245 .vb 2246 0 1 2 3 4 5 6 7 8 9 10 11 2247 -------------------------- 2248 row 3 |. . . d d d o o o o o o 2249 row 4 |. . . d d d o o o o o o 2250 row 5 |. . . d d d o o o o o o 2251 -------------------------- 2252 .ve 2253 2254 Thus, any entries in the d locations are stored in the d (diagonal) 2255 submatrix, and any entries in the o locations are stored in the 2256 o (off-diagonal) submatrix. Note that the d matrix is stored in 2257 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2258 2259 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2260 plus the diagonal part of the d matrix, 2261 and `o_nz` should indicate the number of block nonzeros per row in the o matrix 2262 2263 In general, for PDE problems in which most nonzeros are near the diagonal, 2264 one expects `d_nz` >> `o_nz`. 2265 2266 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2267 @*/ 2268 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2269 { 2270 PetscFunctionBegin; 2271 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2272 PetscValidType(B, 1); 2273 PetscValidLogicalCollectiveInt(B, bs, 2); 2274 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2275 PetscFunctionReturn(PETSC_SUCCESS); 2276 } 2277 2278 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2279 /*@ 2280 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2281 (block compressed row). For good matrix assembly performance 2282 the user should preallocate the matrix storage by setting the parameters 2283 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 2284 2285 Collective 2286 2287 Input Parameters: 2288 + comm - MPI communicator 2289 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2290 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2291 . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given) 2292 This value should be the same as the local size used in creating the 2293 y vector for the matrix-vector product y = Ax. 2294 . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given) 2295 This value should be the same as the local size used in creating the 2296 x vector for the matrix-vector product y = Ax. 2297 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2298 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2299 . d_nz - number of block nonzeros per block row in diagonal portion of local 2300 submatrix (same for all local rows) 2301 . d_nnz - array containing the number of block nonzeros in the various block rows 2302 in the upper triangular portion of the in diagonal portion of the local 2303 (possibly different for each block block row) or `NULL`. 2304 If you plan to factor the matrix you must leave room for the diagonal entry and 2305 set its value even if it is zero. 2306 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2307 submatrix (same for all local rows). 2308 - o_nnz - array containing the number of nonzeros in the various block rows of the 2309 off-diagonal portion of the local submatrix (possibly different for 2310 each block row) or `NULL`. 2311 2312 Output Parameter: 2313 . A - the matrix 2314 2315 Options Database Keys: 2316 + -mat_no_unroll - uses code that does not unroll the loops in the 2317 block calculations (much slower) 2318 . -mat_block_size - size of the blocks to use 2319 - -mat_mpi - use the parallel matrix data structures even on one processor 2320 (defaults to using SeqBAIJ format on one processor) 2321 2322 Level: intermediate 2323 2324 Notes: 2325 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2326 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2327 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2328 2329 The number of rows and columns must be divisible by blocksize. 2330 This matrix type does not support complex Hermitian operation. 2331 2332 The user MUST specify either the local or global matrix dimensions 2333 (possibly both). 2334 2335 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2336 than it must be used on all processors that share the object for that argument. 2337 2338 If `m` and `n` are not `PETSC_DECIDE`, then the values determines the `PetscLayout` of the matrix and the ranges returned by 2339 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 2340 2341 If the *_nnz parameter is given then the *_nz parameter is ignored 2342 2343 Storage Information: 2344 For a square global matrix we define each processor's diagonal portion 2345 to be its local rows and the corresponding columns (a square submatrix); 2346 each processor's off-diagonal portion encompasses the remainder of the 2347 local matrix (a rectangular submatrix). 2348 2349 The user can specify preallocated storage for the diagonal part of 2350 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2351 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2352 memory allocation. Likewise, specify preallocated storage for the 2353 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2354 2355 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2356 the figure below we depict these three local rows and all columns (0-11). 2357 2358 .vb 2359 0 1 2 3 4 5 6 7 8 9 10 11 2360 -------------------------- 2361 row 3 |. . . d d d o o o o o o 2362 row 4 |. . . d d d o o o o o o 2363 row 5 |. . . d d d o o o o o o 2364 -------------------------- 2365 .ve 2366 2367 Thus, any entries in the d locations are stored in the d (diagonal) 2368 submatrix, and any entries in the o locations are stored in the 2369 o (off-diagonal) submatrix. Note that the d matrix is stored in 2370 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2371 2372 Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular 2373 plus the diagonal part of the d matrix, 2374 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 2375 In general, for PDE problems in which most nonzeros are near the diagonal, 2376 one expects `d_nz` >> `o_nz`. 2377 2378 .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, 2379 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 2380 @*/ 2381 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2382 { 2383 PetscMPIInt size; 2384 2385 PetscFunctionBegin; 2386 PetscCall(MatCreate(comm, A)); 2387 PetscCall(MatSetSizes(*A, m, n, M, N)); 2388 PetscCallMPI(MPI_Comm_size(comm, &size)); 2389 if (size > 1) { 2390 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2391 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2392 } else { 2393 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2394 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2395 } 2396 PetscFunctionReturn(PETSC_SUCCESS); 2397 } 2398 2399 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2400 { 2401 Mat mat; 2402 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2403 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2404 PetscScalar *array; 2405 2406 PetscFunctionBegin; 2407 *newmat = NULL; 2408 2409 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2410 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2411 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2412 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2413 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2414 2415 if (matin->hash_active) { 2416 PetscCall(MatSetUp(mat)); 2417 } else { 2418 mat->factortype = matin->factortype; 2419 mat->preallocated = PETSC_TRUE; 2420 mat->assembled = PETSC_TRUE; 2421 mat->insertmode = NOT_SET_VALUES; 2422 2423 a = (Mat_MPISBAIJ *)mat->data; 2424 a->bs2 = oldmat->bs2; 2425 a->mbs = oldmat->mbs; 2426 a->nbs = oldmat->nbs; 2427 a->Mbs = oldmat->Mbs; 2428 a->Nbs = oldmat->Nbs; 2429 2430 a->size = oldmat->size; 2431 a->rank = oldmat->rank; 2432 a->donotstash = oldmat->donotstash; 2433 a->roworiented = oldmat->roworiented; 2434 a->rowindices = NULL; 2435 a->rowvalues = NULL; 2436 a->getrowactive = PETSC_FALSE; 2437 a->barray = NULL; 2438 a->rstartbs = oldmat->rstartbs; 2439 a->rendbs = oldmat->rendbs; 2440 a->cstartbs = oldmat->cstartbs; 2441 a->cendbs = oldmat->cendbs; 2442 2443 /* hash table stuff */ 2444 a->ht = NULL; 2445 a->hd = NULL; 2446 a->ht_size = 0; 2447 a->ht_flag = oldmat->ht_flag; 2448 a->ht_fact = oldmat->ht_fact; 2449 a->ht_total_ct = 0; 2450 a->ht_insert_ct = 0; 2451 2452 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2453 if (oldmat->colmap) { 2454 #if defined(PETSC_USE_CTABLE) 2455 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2456 #else 2457 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2458 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2459 #endif 2460 } else a->colmap = NULL; 2461 2462 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 2463 PetscCall(PetscMalloc1(len, &a->garray)); 2464 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2465 } else a->garray = NULL; 2466 2467 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2468 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2469 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2470 2471 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2472 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2473 2474 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2475 PetscCall(VecGetArray(a->slvec1, &array)); 2476 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2477 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2478 PetscCall(VecRestoreArray(a->slvec1, &array)); 2479 PetscCall(VecGetArray(a->slvec0, &array)); 2480 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2481 PetscCall(VecRestoreArray(a->slvec0, &array)); 2482 2483 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2484 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2485 a->sMvctx = oldmat->sMvctx; 2486 2487 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2488 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2489 } 2490 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2491 *newmat = mat; 2492 PetscFunctionReturn(PETSC_SUCCESS); 2493 } 2494 2495 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2496 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2497 2498 static PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2499 { 2500 PetscBool isbinary; 2501 2502 PetscFunctionBegin; 2503 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2504 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2505 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2506 PetscFunctionReturn(PETSC_SUCCESS); 2507 } 2508 2509 static PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2510 { 2511 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2512 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)a->B->data; 2513 PetscReal atmp; 2514 PetscReal *work, *svalues, *rvalues; 2515 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2516 PetscMPIInt rank, size; 2517 PetscInt *rowners_bs, count, source; 2518 PetscScalar *va; 2519 MatScalar *ba; 2520 MPI_Status stat; 2521 2522 PetscFunctionBegin; 2523 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2524 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2525 PetscCall(VecGetArray(v, &va)); 2526 2527 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2528 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2529 2530 bs = A->rmap->bs; 2531 mbs = a->mbs; 2532 Mbs = a->Mbs; 2533 ba = b->a; 2534 bi = b->i; 2535 bj = b->j; 2536 2537 /* find ownerships */ 2538 rowners_bs = A->rmap->range; 2539 2540 /* each proc creates an array to be distributed */ 2541 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2542 2543 /* row_max for B */ 2544 if (rank != size - 1) { 2545 for (i = 0; i < mbs; i++) { 2546 ncols = bi[1] - bi[0]; 2547 bi++; 2548 brow = bs * i; 2549 for (j = 0; j < ncols; j++) { 2550 bcol = bs * (*bj); 2551 for (kcol = 0; kcol < bs; kcol++) { 2552 col = bcol + kcol; /* local col index */ 2553 col += rowners_bs[rank + 1]; /* global col index */ 2554 for (krow = 0; krow < bs; krow++) { 2555 atmp = PetscAbsScalar(*ba); 2556 ba++; 2557 row = brow + krow; /* local row index */ 2558 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2559 if (work[col] < atmp) work[col] = atmp; 2560 } 2561 } 2562 bj++; 2563 } 2564 } 2565 2566 /* send values to its owners */ 2567 for (PetscMPIInt dest = rank + 1; dest < size; dest++) { 2568 svalues = work + rowners_bs[dest]; 2569 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2570 PetscCallMPI(MPIU_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2571 } 2572 } 2573 2574 /* receive values */ 2575 if (rank) { 2576 rvalues = work; 2577 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2578 for (source = 0; source < rank; source++) { 2579 PetscCallMPI(MPIU_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2580 /* process values */ 2581 for (i = 0; i < count; i++) { 2582 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2583 } 2584 } 2585 } 2586 2587 PetscCall(VecRestoreArray(v, &va)); 2588 PetscCall(PetscFree(work)); 2589 PetscFunctionReturn(PETSC_SUCCESS); 2590 } 2591 2592 static PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2593 { 2594 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2595 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2596 PetscScalar *x, *ptr, *from; 2597 Vec bb1; 2598 const PetscScalar *b; 2599 2600 PetscFunctionBegin; 2601 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2602 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2603 2604 if (flag == SOR_APPLY_UPPER) { 2605 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2606 PetscFunctionReturn(PETSC_SUCCESS); 2607 } 2608 2609 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2610 if (flag & SOR_ZERO_INITIAL_GUESS) { 2611 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2612 its--; 2613 } 2614 2615 PetscCall(VecDuplicate(bb, &bb1)); 2616 while (its--) { 2617 /* lower triangular part: slvec0b = - B^T*xx */ 2618 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2619 2620 /* copy xx into slvec0a */ 2621 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2622 PetscCall(VecGetArray(xx, &x)); 2623 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2624 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2625 2626 PetscCall(VecScale(mat->slvec0, -1.0)); 2627 2628 /* copy bb into slvec1a */ 2629 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2630 PetscCall(VecGetArrayRead(bb, &b)); 2631 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2632 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2633 2634 /* set slvec1b = 0 */ 2635 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2636 PetscCall(VecZeroEntries(mat->slvec1b)); 2637 2638 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2639 PetscCall(VecRestoreArray(xx, &x)); 2640 PetscCall(VecRestoreArrayRead(bb, &b)); 2641 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2642 2643 /* upper triangular part: bb1 = bb1 - B*x */ 2644 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2645 2646 /* local diagonal sweep */ 2647 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2648 } 2649 PetscCall(VecDestroy(&bb1)); 2650 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2651 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2652 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2653 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2654 } else if (flag & SOR_EISENSTAT) { 2655 Vec xx1; 2656 PetscBool hasop; 2657 const PetscScalar *diag; 2658 PetscScalar *sl, scale = (omega - 2.0) / omega; 2659 PetscInt i, n; 2660 2661 if (!mat->xx1) { 2662 PetscCall(VecDuplicate(bb, &mat->xx1)); 2663 PetscCall(VecDuplicate(bb, &mat->bb1)); 2664 } 2665 xx1 = mat->xx1; 2666 bb1 = mat->bb1; 2667 2668 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2669 2670 if (!mat->diag) { 2671 /* this is wrong for same matrix with new nonzero values */ 2672 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2673 PetscCall(MatGetDiagonal(matin, mat->diag)); 2674 } 2675 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2676 2677 if (hasop) { 2678 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2679 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2680 } else { 2681 /* 2682 These two lines are replaced by code that may be a bit faster for a good compiler 2683 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2684 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2685 */ 2686 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2687 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2688 PetscCall(VecGetArrayRead(bb, &b)); 2689 PetscCall(VecGetArray(xx, &x)); 2690 PetscCall(VecGetLocalSize(xx, &n)); 2691 if (omega == 1.0) { 2692 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2693 PetscCall(PetscLogFlops(2.0 * n)); 2694 } else { 2695 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2696 PetscCall(PetscLogFlops(3.0 * n)); 2697 } 2698 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2699 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2700 PetscCall(VecRestoreArrayRead(bb, &b)); 2701 PetscCall(VecRestoreArray(xx, &x)); 2702 } 2703 2704 /* multiply off-diagonal portion of matrix */ 2705 PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b)); 2706 PetscCall(VecZeroEntries(mat->slvec1b)); 2707 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2708 PetscCall(VecGetArray(mat->slvec0, &from)); 2709 PetscCall(VecGetArray(xx, &x)); 2710 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2711 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2712 PetscCall(VecRestoreArray(xx, &x)); 2713 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2714 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2715 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2716 2717 /* local sweep */ 2718 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2719 PetscCall(VecAXPY(xx, 1.0, xx1)); 2720 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 /*@ 2725 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard CSR format for the local rows. 2726 2727 Collective 2728 2729 Input Parameters: 2730 + comm - MPI communicator 2731 . bs - the block size, only a block size of 1 is supported 2732 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2733 . n - This value should be the same as the local size used in creating the 2734 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 2735 calculated if `N` is given) For square matrices `n` is almost always `m`. 2736 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 2737 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 2738 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2739 . j - column indices 2740 - a - matrix values 2741 2742 Output Parameter: 2743 . mat - the matrix 2744 2745 Level: intermediate 2746 2747 Notes: 2748 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 2749 thus you CANNOT change the matrix entries by changing the values of `a` after you have 2750 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 2751 2752 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 2753 2754 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2755 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatMPISBAIJSetPreallocationCSR()` 2756 @*/ 2757 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2758 { 2759 PetscFunctionBegin; 2760 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2761 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2762 PetscCall(MatCreate(comm, mat)); 2763 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2764 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2765 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2766 PetscFunctionReturn(PETSC_SUCCESS); 2767 } 2768 2769 /*@ 2770 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2771 2772 Collective 2773 2774 Input Parameters: 2775 + B - the matrix 2776 . bs - the block size 2777 . i - the indices into `j` for the start of each local row (indices start with zero) 2778 . j - the column indices for each local row (indices start with zero) these must be sorted for each row 2779 - v - optional values in the matrix, pass `NULL` if not provided 2780 2781 Level: advanced 2782 2783 Notes: 2784 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2785 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2786 called this routine. 2787 2788 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2789 and usually the numerical values as well 2790 2791 Any entries passed in that are below the diagonal are ignored 2792 2793 .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 2794 `MatCreateMPISBAIJWithArrays()` 2795 @*/ 2796 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2797 { 2798 PetscFunctionBegin; 2799 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2800 PetscFunctionReturn(PETSC_SUCCESS); 2801 } 2802 2803 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2804 { 2805 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2806 PetscInt *indx; 2807 PetscScalar *values; 2808 2809 PetscFunctionBegin; 2810 PetscCall(MatGetSize(inmat, &m, &N)); 2811 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2812 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2813 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2814 PetscInt *bindx, rmax = a->rmax, j; 2815 PetscMPIInt rank, size; 2816 2817 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2818 mbs = m / bs; 2819 Nbs = N / cbs; 2820 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2821 nbs = n / cbs; 2822 2823 PetscCall(PetscMalloc1(rmax, &bindx)); 2824 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2825 2826 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2827 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2828 if (rank == size - 1) { 2829 /* Check sum(nbs) = Nbs */ 2830 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2831 } 2832 2833 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2834 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2835 for (i = 0; i < mbs; i++) { 2836 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2837 nnz = nnz / bs; 2838 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2839 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2840 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2841 } 2842 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2843 PetscCall(PetscFree(bindx)); 2844 2845 PetscCall(MatCreate(comm, outmat)); 2846 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2847 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2848 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2849 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2850 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2851 MatPreallocateEnd(dnz, onz); 2852 } 2853 2854 /* numeric phase */ 2855 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2856 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2857 2858 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2859 for (i = 0; i < m; i++) { 2860 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2861 Ii = i + rstart; 2862 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2863 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2864 } 2865 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2866 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2867 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2868 PetscFunctionReturn(PETSC_SUCCESS); 2869 } 2870