1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscblaslapack.h> 5 6 #if defined(PETSC_HAVE_ELEMENTAL) 7 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 8 #endif 9 #if defined(PETSC_HAVE_SCALAPACK) 10 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 11 #endif 12 13 /* This could be moved to matimpl.h */ 14 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill) 15 { 16 Mat preallocator; 17 PetscInt r, rstart, rend; 18 PetscInt bs, i, m, n, M, N; 19 PetscBool cong = PETSC_TRUE; 20 21 PetscFunctionBegin; 22 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 23 PetscValidLogicalCollectiveInt(B, nm, 2); 24 for (i = 0; i < nm; i++) { 25 PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3); 26 PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong)); 27 PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts"); 28 } 29 PetscValidLogicalCollectiveBool(B, fill, 5); 30 PetscCall(MatGetBlockSize(B, &bs)); 31 PetscCall(MatGetSize(B, &M, &N)); 32 PetscCall(MatGetLocalSize(B, &m, &n)); 33 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator)); 34 PetscCall(MatSetType(preallocator, MATPREALLOCATOR)); 35 PetscCall(MatSetBlockSize(preallocator, bs)); 36 PetscCall(MatSetSizes(preallocator, m, n, M, N)); 37 PetscCall(MatSetUp(preallocator)); 38 PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend)); 39 for (r = rstart; r < rend; ++r) { 40 PetscInt ncols; 41 const PetscInt *row; 42 const PetscScalar *vals; 43 44 for (i = 0; i < nm; i++) { 45 PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals)); 46 PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES)); 47 if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES)); 48 PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals)); 49 } 50 } 51 PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY)); 52 PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY)); 53 PetscCall(MatPreallocatorPreallocate(preallocator, fill, B)); 54 PetscCall(MatDestroy(&preallocator)); 55 PetscFunctionReturn(PETSC_SUCCESS); 56 } 57 58 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 59 { 60 Mat B; 61 PetscInt r; 62 63 PetscFunctionBegin; 64 if (reuse != MAT_REUSE_MATRIX) { 65 PetscBool symm = PETSC_TRUE, isdense; 66 PetscInt bs; 67 68 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 69 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 70 PetscCall(MatSetType(B, newtype)); 71 PetscCall(MatGetBlockSize(A, &bs)); 72 PetscCall(MatSetBlockSize(B, bs)); 73 PetscCall(PetscLayoutSetUp(B->rmap)); 74 PetscCall(PetscLayoutSetUp(B->cmap)); 75 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, "")); 76 if (!isdense) { 77 PetscCall(MatGetRowUpperTriangular(A)); 78 PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE)); 79 PetscCall(MatRestoreRowUpperTriangular(A)); 80 } else { 81 PetscCall(MatSetUp(B)); 82 } 83 } else { 84 B = *newmat; 85 PetscCall(MatZeroEntries(B)); 86 } 87 88 PetscCall(MatGetRowUpperTriangular(A)); 89 for (r = A->rmap->rstart; r < A->rmap->rend; r++) { 90 PetscInt ncols; 91 const PetscInt *row; 92 const PetscScalar *vals; 93 94 PetscCall(MatGetRow(A, r, &ncols, &row, &vals)); 95 PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES)); 96 #if defined(PETSC_USE_COMPLEX) 97 if (A->hermitian == PETSC_BOOL3_TRUE) { 98 PetscInt i; 99 for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES)); 100 } else { 101 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 102 } 103 #else 104 PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES)); 105 #endif 106 PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals)); 107 } 108 PetscCall(MatRestoreRowUpperTriangular(A)); 109 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 110 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 111 112 if (reuse == MAT_INPLACE_MATRIX) { 113 PetscCall(MatHeaderReplace(A, &B)); 114 } else { 115 *newmat = B; 116 } 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat) 121 { 122 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 123 124 PetscFunctionBegin; 125 PetscCall(MatStoreValues(aij->A)); 126 PetscCall(MatStoreValues(aij->B)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat) 131 { 132 Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data; 133 134 PetscFunctionBegin; 135 PetscCall(MatRetrieveValues(aij->A)); 136 PetscCall(MatRetrieveValues(aij->B)); 137 PetscFunctionReturn(PETSC_SUCCESS); 138 } 139 140 #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 141 { \ 142 brow = row / bs; \ 143 rp = aj + ai[brow]; \ 144 ap = aa + bs2 * ai[brow]; \ 145 rmax = aimax[brow]; \ 146 nrow = ailen[brow]; \ 147 bcol = col / bs; \ 148 ridx = row % bs; \ 149 cidx = col % bs; \ 150 low = 0; \ 151 high = nrow; \ 152 while (high - low > 3) { \ 153 t = (low + high) / 2; \ 154 if (rp[t] > bcol) high = t; \ 155 else low = t; \ 156 } \ 157 for (_i = low; _i < high; _i++) { \ 158 if (rp[_i] > bcol) break; \ 159 if (rp[_i] == bcol) { \ 160 bap = ap + bs2 * _i + bs * cidx + ridx; \ 161 if (addv == ADD_VALUES) *bap += value; \ 162 else *bap = value; \ 163 goto a_noinsert; \ 164 } \ 165 } \ 166 if (a->nonew == 1) goto a_noinsert; \ 167 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 168 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 169 N = nrow++ - 1; \ 170 /* shift up all the later entries in this row */ \ 171 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 172 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 173 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 174 rp[_i] = bcol; \ 175 ap[bs2 * _i + bs * cidx + ridx] = value; \ 176 A->nonzerostate++; \ 177 a_noinsert:; \ 178 ailen[brow] = nrow; \ 179 } 180 181 #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 182 { \ 183 brow = row / bs; \ 184 rp = bj + bi[brow]; \ 185 ap = ba + bs2 * bi[brow]; \ 186 rmax = bimax[brow]; \ 187 nrow = bilen[brow]; \ 188 bcol = col / bs; \ 189 ridx = row % bs; \ 190 cidx = col % bs; \ 191 low = 0; \ 192 high = nrow; \ 193 while (high - low > 3) { \ 194 t = (low + high) / 2; \ 195 if (rp[t] > bcol) high = t; \ 196 else low = t; \ 197 } \ 198 for (_i = low; _i < high; _i++) { \ 199 if (rp[_i] > bcol) break; \ 200 if (rp[_i] == bcol) { \ 201 bap = ap + bs2 * _i + bs * cidx + ridx; \ 202 if (addv == ADD_VALUES) *bap += value; \ 203 else *bap = value; \ 204 goto b_noinsert; \ 205 } \ 206 } \ 207 if (b->nonew == 1) goto b_noinsert; \ 208 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 209 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 210 N = nrow++ - 1; \ 211 /* shift up all the later entries in this row */ \ 212 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 213 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 214 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 215 rp[_i] = bcol; \ 216 ap[bs2 * _i + bs * cidx + ridx] = value; \ 217 B->nonzerostate++; \ 218 b_noinsert:; \ 219 bilen[brow] = nrow; \ 220 } 221 222 /* Only add/insert a(i,j) with i<=j (blocks). 223 Any a(i,j) with i>j input by user is ignored or generates an error 224 */ 225 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 226 { 227 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 228 MatScalar value; 229 PetscBool roworiented = baij->roworiented; 230 PetscInt i, j, row, col; 231 PetscInt rstart_orig = mat->rmap->rstart; 232 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 233 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 234 235 /* Some Variables required in the macro */ 236 Mat A = baij->A; 237 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data; 238 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 239 MatScalar *aa = a->a; 240 241 Mat B = baij->B; 242 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 243 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 244 MatScalar *ba = b->a; 245 246 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 247 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 248 MatScalar *ap, *bap; 249 250 /* for stash */ 251 PetscInt n_loc, *in_loc = NULL; 252 MatScalar *v_loc = NULL; 253 254 PetscFunctionBegin; 255 if (!baij->donotstash) { 256 if (n > baij->n_loc) { 257 PetscCall(PetscFree(baij->in_loc)); 258 PetscCall(PetscFree(baij->v_loc)); 259 PetscCall(PetscMalloc1(n, &baij->in_loc)); 260 PetscCall(PetscMalloc1(n, &baij->v_loc)); 261 262 baij->n_loc = n; 263 } 264 in_loc = baij->in_loc; 265 v_loc = baij->v_loc; 266 } 267 268 for (i = 0; i < m; i++) { 269 if (im[i] < 0) continue; 270 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 271 if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 272 row = im[i] - rstart_orig; /* local row index */ 273 for (j = 0; j < n; j++) { 274 if (im[i] / bs > in[j] / bs) { 275 if (a->ignore_ltriangular) { 276 continue; /* ignore lower triangular blocks */ 277 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 278 } 279 if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */ 280 col = in[j] - cstart_orig; /* local col index */ 281 brow = row / bs; 282 bcol = col / bs; 283 if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 284 if (roworiented) value = v[i * n + j]; 285 else value = v[i + j * m]; 286 MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 287 /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */ 288 } else if (in[j] < 0) { 289 continue; 290 } else { 291 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 292 /* off-diag entry (B) */ 293 if (mat->was_assembled) { 294 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 295 #if defined(PETSC_USE_CTABLE) 296 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 297 col = col - 1; 298 #else 299 col = baij->colmap[in[j] / bs] - 1; 300 #endif 301 if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) { 302 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 303 col = in[j]; 304 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 305 B = baij->B; 306 b = (Mat_SeqBAIJ *)(B)->data; 307 bimax = b->imax; 308 bi = b->i; 309 bilen = b->ilen; 310 bj = b->j; 311 ba = b->a; 312 } else col += in[j] % bs; 313 } else col = in[j]; 314 if (roworiented) value = v[i * n + j]; 315 else value = v[i + j * m]; 316 MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 317 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 318 } 319 } 320 } else { /* off processor entry */ 321 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 322 if (!baij->donotstash) { 323 mat->assembled = PETSC_FALSE; 324 n_loc = 0; 325 for (j = 0; j < n; j++) { 326 if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */ 327 in_loc[n_loc] = in[j]; 328 if (roworiented) { 329 v_loc[n_loc] = v[i * n + j]; 330 } else { 331 v_loc[n_loc] = v[j * m + i]; 332 } 333 n_loc++; 334 } 335 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE)); 336 } 337 } 338 } 339 PetscFunctionReturn(PETSC_SUCCESS); 340 } 341 342 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 343 { 344 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data; 345 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 346 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 347 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 348 PetscBool roworiented = a->roworiented; 349 const PetscScalar *value = v; 350 MatScalar *ap, *aa = a->a, *bap; 351 352 PetscFunctionBegin; 353 if (col < row) { 354 if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */ 355 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 356 } 357 rp = aj + ai[row]; 358 ap = aa + bs2 * ai[row]; 359 rmax = imax[row]; 360 nrow = ailen[row]; 361 value = v; 362 low = 0; 363 high = nrow; 364 365 while (high - low > 7) { 366 t = (low + high) / 2; 367 if (rp[t] > col) high = t; 368 else low = t; 369 } 370 for (i = low; i < high; i++) { 371 if (rp[i] > col) break; 372 if (rp[i] == col) { 373 bap = ap + bs2 * i; 374 if (roworiented) { 375 if (is == ADD_VALUES) { 376 for (ii = 0; ii < bs; ii++) { 377 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 378 } 379 } else { 380 for (ii = 0; ii < bs; ii++) { 381 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 382 } 383 } 384 } else { 385 if (is == ADD_VALUES) { 386 for (ii = 0; ii < bs; ii++) { 387 for (jj = 0; jj < bs; jj++) *bap++ += *value++; 388 } 389 } else { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 392 } 393 } 394 } 395 goto noinsert2; 396 } 397 } 398 if (nonew == 1) goto noinsert2; 399 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 400 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 401 N = nrow++ - 1; 402 high++; 403 /* shift up all the later entries in this row */ 404 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 405 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 406 rp[i] = col; 407 bap = ap + bs2 * i; 408 if (roworiented) { 409 for (ii = 0; ii < bs; ii++) { 410 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 411 } 412 } else { 413 for (ii = 0; ii < bs; ii++) { 414 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 415 } 416 } 417 noinsert2:; 418 ailen[row] = nrow; 419 PetscFunctionReturn(PETSC_SUCCESS); 420 } 421 422 /* 423 This routine is exactly duplicated in mpibaij.c 424 */ 425 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 426 { 427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 428 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 429 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 430 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 431 PetscBool roworiented = a->roworiented; 432 const PetscScalar *value = v; 433 MatScalar *ap, *aa = a->a, *bap; 434 435 PetscFunctionBegin; 436 rp = aj + ai[row]; 437 ap = aa + bs2 * ai[row]; 438 rmax = imax[row]; 439 nrow = ailen[row]; 440 low = 0; 441 high = nrow; 442 value = v; 443 while (high - low > 7) { 444 t = (low + high) / 2; 445 if (rp[t] > col) high = t; 446 else low = t; 447 } 448 for (i = low; i < high; i++) { 449 if (rp[i] > col) break; 450 if (rp[i] == col) { 451 bap = ap + bs2 * i; 452 if (roworiented) { 453 if (is == ADD_VALUES) { 454 for (ii = 0; ii < bs; ii++) { 455 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 456 } 457 } else { 458 for (ii = 0; ii < bs; ii++) { 459 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 460 } 461 } 462 } else { 463 if (is == ADD_VALUES) { 464 for (ii = 0; ii < bs; ii++, value += bs) { 465 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 466 bap += bs; 467 } 468 } else { 469 for (ii = 0; ii < bs; ii++, value += bs) { 470 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 471 bap += bs; 472 } 473 } 474 } 475 goto noinsert2; 476 } 477 } 478 if (nonew == 1) goto noinsert2; 479 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 480 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 481 N = nrow++ - 1; 482 high++; 483 /* shift up all the later entries in this row */ 484 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 485 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 486 rp[i] = col; 487 bap = ap + bs2 * i; 488 if (roworiented) { 489 for (ii = 0; ii < bs; ii++) { 490 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 491 } 492 } else { 493 for (ii = 0; ii < bs; ii++) { 494 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 495 } 496 } 497 noinsert2:; 498 ailen[row] = nrow; 499 PetscFunctionReturn(PETSC_SUCCESS); 500 } 501 502 /* 503 This routine could be optimized by removing the need for the block copy below and passing stride information 504 to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ() 505 */ 506 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv) 507 { 508 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 509 const MatScalar *value; 510 MatScalar *barray = baij->barray; 511 PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular; 512 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 513 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 514 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 515 516 PetscFunctionBegin; 517 if (!barray) { 518 PetscCall(PetscMalloc1(bs2, &barray)); 519 baij->barray = barray; 520 } 521 522 if (roworiented) { 523 stepval = (n - 1) * bs; 524 } else { 525 stepval = (m - 1) * bs; 526 } 527 for (i = 0; i < m; i++) { 528 if (im[i] < 0) continue; 529 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 530 if (im[i] >= rstart && im[i] < rend) { 531 row = im[i] - rstart; 532 for (j = 0; j < n; j++) { 533 if (im[i] > in[j]) { 534 if (ignore_ltriangular) continue; /* ignore lower triangular blocks */ 535 else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 536 } 537 /* If NumCol = 1 then a copy is not required */ 538 if ((roworiented) && (n == 1)) { 539 barray = (MatScalar *)v + i * bs2; 540 } else if ((!roworiented) && (m == 1)) { 541 barray = (MatScalar *)v + j * bs2; 542 } else { /* Here a copy is required */ 543 if (roworiented) { 544 value = v + i * (stepval + bs) * bs + j * bs; 545 } else { 546 value = v + j * (stepval + bs) * bs + i * bs; 547 } 548 for (ii = 0; ii < bs; ii++, value += stepval) { 549 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 550 } 551 barray -= bs2; 552 } 553 554 if (in[j] >= cstart && in[j] < cend) { 555 col = in[j] - cstart; 556 PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 557 } else if (in[j] < 0) { 558 continue; 559 } else { 560 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 561 if (mat->was_assembled) { 562 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 563 564 #if defined(PETSC_USE_DEBUG) 565 #if defined(PETSC_USE_CTABLE) 566 { 567 PetscInt data; 568 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 569 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 570 } 571 #else 572 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 573 #endif 574 #endif 575 #if defined(PETSC_USE_CTABLE) 576 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 577 col = (col - 1) / bs; 578 #else 579 col = (baij->colmap[in[j]] - 1) / bs; 580 #endif 581 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 582 PetscCall(MatDisAssemble_MPISBAIJ(mat)); 583 col = in[j]; 584 } 585 } else col = in[j]; 586 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 587 } 588 } 589 } else { 590 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 591 if (!baij->donotstash) { 592 if (roworiented) { 593 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 594 } else { 595 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 596 } 597 } 598 } 599 } 600 PetscFunctionReturn(PETSC_SUCCESS); 601 } 602 603 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 604 { 605 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 606 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 607 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 608 609 PetscFunctionBegin; 610 for (i = 0; i < m; i++) { 611 if (idxm[i] < 0) continue; /* negative row */ 612 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 613 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 614 row = idxm[i] - bsrstart; 615 for (j = 0; j < n; j++) { 616 if (idxn[j] < 0) continue; /* negative column */ 617 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 618 if (idxn[j] >= bscstart && idxn[j] < bscend) { 619 col = idxn[j] - bscstart; 620 PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 621 } else { 622 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 623 #if defined(PETSC_USE_CTABLE) 624 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 625 data--; 626 #else 627 data = baij->colmap[idxn[j] / bs] - 1; 628 #endif 629 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 630 else { 631 col = data + idxn[j] % bs; 632 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 633 } 634 } 635 } 636 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 637 } 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm) 642 { 643 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 644 PetscReal sum[2], *lnorm2; 645 646 PetscFunctionBegin; 647 if (baij->size == 1) { 648 PetscCall(MatNorm(baij->A, type, norm)); 649 } else { 650 if (type == NORM_FROBENIUS) { 651 PetscCall(PetscMalloc1(2, &lnorm2)); 652 PetscCall(MatNorm(baij->A, type, lnorm2)); 653 *lnorm2 = (*lnorm2) * (*lnorm2); 654 lnorm2++; /* squar power of norm(A) */ 655 PetscCall(MatNorm(baij->B, type, lnorm2)); 656 *lnorm2 = (*lnorm2) * (*lnorm2); 657 lnorm2--; /* squar power of norm(B) */ 658 PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 659 *norm = PetscSqrtReal(sum[0] + 2 * sum[1]); 660 PetscCall(PetscFree(lnorm2)); 661 } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */ 662 Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data; 663 Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data; 664 PetscReal *rsum, *rsum2, vabs; 665 PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz; 666 PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs; 667 MatScalar *v; 668 669 PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2)); 670 PetscCall(PetscArrayzero(rsum, mat->cmap->N)); 671 /* Amat */ 672 v = amat->a; 673 jj = amat->j; 674 for (brow = 0; brow < mbs; brow++) { 675 grow = bs * (rstart + brow); 676 nz = amat->i[brow + 1] - amat->i[brow]; 677 for (bcol = 0; bcol < nz; bcol++) { 678 gcol = bs * (rstart + *jj); 679 jj++; 680 for (col = 0; col < bs; col++) { 681 for (row = 0; row < bs; row++) { 682 vabs = PetscAbsScalar(*v); 683 v++; 684 rsum[gcol + col] += vabs; 685 /* non-diagonal block */ 686 if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs; 687 } 688 } 689 } 690 PetscCall(PetscLogFlops(nz * bs * bs)); 691 } 692 /* Bmat */ 693 v = bmat->a; 694 jj = bmat->j; 695 for (brow = 0; brow < mbs; brow++) { 696 grow = bs * (rstart + brow); 697 nz = bmat->i[brow + 1] - bmat->i[brow]; 698 for (bcol = 0; bcol < nz; bcol++) { 699 gcol = bs * garray[*jj]; 700 jj++; 701 for (col = 0; col < bs; col++) { 702 for (row = 0; row < bs; row++) { 703 vabs = PetscAbsScalar(*v); 704 v++; 705 rsum[gcol + col] += vabs; 706 rsum[grow + row] += vabs; 707 } 708 } 709 } 710 PetscCall(PetscLogFlops(nz * bs * bs)); 711 } 712 PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 713 *norm = 0.0; 714 for (col = 0; col < mat->cmap->N; col++) { 715 if (rsum2[col] > *norm) *norm = rsum2[col]; 716 } 717 PetscCall(PetscFree2(rsum, rsum2)); 718 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet"); 719 } 720 PetscFunctionReturn(PETSC_SUCCESS); 721 } 722 723 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode) 724 { 725 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 726 PetscInt nstash, reallocs; 727 728 PetscFunctionBegin; 729 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 730 731 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 732 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 733 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 734 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 735 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 736 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 743 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data; 744 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 745 PetscInt *row, *col; 746 PetscBool other_disassembled; 747 PetscMPIInt n; 748 PetscBool r1, r2, r3; 749 MatScalar *val; 750 751 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 752 PetscFunctionBegin; 753 if (!baij->donotstash && !mat->nooffprocentries) { 754 while (1) { 755 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 756 if (!flg) break; 757 758 for (i = 0; i < n;) { 759 /* Now identify the consecutive vals belonging to the same row */ 760 for (j = i, rstart = row[j]; j < n; j++) { 761 if (row[j] != rstart) break; 762 } 763 if (j < n) ncols = j - i; 764 else ncols = n - i; 765 /* Now assemble all these values with a single function call */ 766 PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 767 i = j; 768 } 769 } 770 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 771 /* Now process the block-stash. Since the values are stashed column-oriented, 772 set the roworiented flag to column oriented, and after MatSetValues() 773 restore the original flags */ 774 r1 = baij->roworiented; 775 r2 = a->roworiented; 776 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 777 778 baij->roworiented = PETSC_FALSE; 779 a->roworiented = PETSC_FALSE; 780 781 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */ 782 while (1) { 783 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 784 if (!flg) break; 785 786 for (i = 0; i < n;) { 787 /* Now identify the consecutive vals belonging to the same row */ 788 for (j = i, rstart = row[j]; j < n; j++) { 789 if (row[j] != rstart) break; 790 } 791 if (j < n) ncols = j - i; 792 else ncols = n - i; 793 PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 794 i = j; 795 } 796 } 797 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 798 799 baij->roworiented = r1; 800 a->roworiented = r2; 801 802 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */ 803 } 804 805 PetscCall(MatAssemblyBegin(baij->A, mode)); 806 PetscCall(MatAssemblyEnd(baij->A, mode)); 807 808 /* determine if any processor has disassembled, if so we must 809 also disassemble ourselves, in order that we may reassemble. */ 810 /* 811 if nonzero structure of submatrix B cannot change then we know that 812 no processor disassembled thus we can skip this stuff 813 */ 814 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 815 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 816 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat)); 817 } 818 819 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ } 820 PetscCall(MatAssemblyBegin(baij->B, mode)); 821 PetscCall(MatAssemblyEnd(baij->B, mode)); 822 823 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 824 825 baij->rowvalues = NULL; 826 827 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 828 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 829 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 830 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 831 } 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode); 836 #include <petscdraw.h> 837 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 838 { 839 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 840 PetscInt bs = mat->rmap->bs; 841 PetscMPIInt rank = baij->rank; 842 PetscBool iascii, isdraw; 843 PetscViewer sviewer; 844 PetscViewerFormat format; 845 846 PetscFunctionBegin; 847 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 848 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 849 if (iascii) { 850 PetscCall(PetscViewerGetFormat(viewer, &format)); 851 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 852 MatInfo info; 853 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 854 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 855 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 856 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 857 mat->rmap->bs, (double)info.memory)); 858 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 859 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 860 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 861 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 862 PetscCall(PetscViewerFlush(viewer)); 863 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 864 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 865 PetscCall(VecScatterView(baij->Mvctx, viewer)); 866 PetscFunctionReturn(PETSC_SUCCESS); 867 } else if (format == PETSC_VIEWER_ASCII_INFO) { 868 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 869 PetscFunctionReturn(PETSC_SUCCESS); 870 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 871 PetscFunctionReturn(PETSC_SUCCESS); 872 } 873 } 874 875 if (isdraw) { 876 PetscDraw draw; 877 PetscBool isnull; 878 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 879 PetscCall(PetscDrawIsNull(draw, &isnull)); 880 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 { 884 /* assemble the entire matrix onto first processor. */ 885 Mat A; 886 Mat_SeqSBAIJ *Aloc; 887 Mat_SeqBAIJ *Bloc; 888 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 889 MatScalar *a; 890 const char *matname; 891 892 /* Should this be the same type as mat? */ 893 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 894 if (rank == 0) { 895 PetscCall(MatSetSizes(A, M, N, M, N)); 896 } else { 897 PetscCall(MatSetSizes(A, 0, 0, M, N)); 898 } 899 PetscCall(MatSetType(A, MATMPISBAIJ)); 900 PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 901 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 902 903 /* copy over the A part */ 904 Aloc = (Mat_SeqSBAIJ *)baij->A->data; 905 ai = Aloc->i; 906 aj = Aloc->j; 907 a = Aloc->a; 908 PetscCall(PetscMalloc1(bs, &rvals)); 909 910 for (i = 0; i < mbs; i++) { 911 rvals[0] = bs * (baij->rstartbs + i); 912 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 913 for (j = ai[i]; j < ai[i + 1]; j++) { 914 col = (baij->cstartbs + aj[j]) * bs; 915 for (k = 0; k < bs; k++) { 916 PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 917 col++; 918 a += bs; 919 } 920 } 921 } 922 /* copy over the B part */ 923 Bloc = (Mat_SeqBAIJ *)baij->B->data; 924 ai = Bloc->i; 925 aj = Bloc->j; 926 a = Bloc->a; 927 for (i = 0; i < mbs; i++) { 928 rvals[0] = bs * (baij->rstartbs + i); 929 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 930 for (j = ai[i]; j < ai[i + 1]; j++) { 931 col = baij->garray[aj[j]] * bs; 932 for (k = 0; k < bs; k++) { 933 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 934 col++; 935 a += bs; 936 } 937 } 938 } 939 PetscCall(PetscFree(rvals)); 940 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 941 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 942 /* 943 Everyone has to call to draw the matrix since the graphics waits are 944 synchronized across all processors that share the PetscDraw object 945 */ 946 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 947 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 948 if (rank == 0) { 949 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname)); 950 PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer)); 951 } 952 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 953 PetscCall(PetscViewerFlush(viewer)); 954 PetscCall(MatDestroy(&A)); 955 } 956 PetscFunctionReturn(PETSC_SUCCESS); 957 } 958 959 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 960 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary 961 962 PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer) 963 { 964 PetscBool iascii, isdraw, issocket, isbinary; 965 966 PetscFunctionBegin; 967 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 968 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 969 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 970 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 971 if (iascii || isdraw || issocket) { 972 PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer)); 973 } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer)); 974 PetscFunctionReturn(PETSC_SUCCESS); 975 } 976 977 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat) 978 { 979 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 980 981 PetscFunctionBegin; 982 #if defined(PETSC_USE_LOG) 983 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 984 #endif 985 PetscCall(MatStashDestroy_Private(&mat->stash)); 986 PetscCall(MatStashDestroy_Private(&mat->bstash)); 987 PetscCall(MatDestroy(&baij->A)); 988 PetscCall(MatDestroy(&baij->B)); 989 #if defined(PETSC_USE_CTABLE) 990 PetscCall(PetscHMapIDestroy(&baij->colmap)); 991 #else 992 PetscCall(PetscFree(baij->colmap)); 993 #endif 994 PetscCall(PetscFree(baij->garray)); 995 PetscCall(VecDestroy(&baij->lvec)); 996 PetscCall(VecScatterDestroy(&baij->Mvctx)); 997 PetscCall(VecDestroy(&baij->slvec0)); 998 PetscCall(VecDestroy(&baij->slvec0b)); 999 PetscCall(VecDestroy(&baij->slvec1)); 1000 PetscCall(VecDestroy(&baij->slvec1a)); 1001 PetscCall(VecDestroy(&baij->slvec1b)); 1002 PetscCall(VecScatterDestroy(&baij->sMvctx)); 1003 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1004 PetscCall(PetscFree(baij->barray)); 1005 PetscCall(PetscFree(baij->hd)); 1006 PetscCall(VecDestroy(&baij->diag)); 1007 PetscCall(VecDestroy(&baij->bb1)); 1008 PetscCall(VecDestroy(&baij->xx1)); 1009 #if defined(PETSC_USE_REAL_MAT_SINGLE) 1010 PetscCall(PetscFree(baij->setvaluescopy)); 1011 #endif 1012 PetscCall(PetscFree(baij->in_loc)); 1013 PetscCall(PetscFree(baij->v_loc)); 1014 PetscCall(PetscFree(baij->rangebs)); 1015 PetscCall(PetscFree(mat->data)); 1016 1017 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1018 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1019 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1020 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL)); 1021 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL)); 1022 #if defined(PETSC_HAVE_ELEMENTAL) 1023 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL)); 1024 #endif 1025 #if defined(PETSC_HAVE_SCALAPACK) 1026 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL)); 1027 #endif 1028 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL)); 1029 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL)); 1030 PetscFunctionReturn(PETSC_SUCCESS); 1031 } 1032 1033 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy) 1034 { 1035 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1036 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1037 PetscScalar *from; 1038 const PetscScalar *x; 1039 1040 PetscFunctionBegin; 1041 /* diagonal part */ 1042 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1043 PetscCall(VecSet(a->slvec1b, 0.0)); 1044 1045 /* subdiagonal part */ 1046 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1047 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1048 1049 /* copy x into the vec slvec0 */ 1050 PetscCall(VecGetArray(a->slvec0, &from)); 1051 PetscCall(VecGetArrayRead(xx, &x)); 1052 1053 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1054 PetscCall(VecRestoreArray(a->slvec0, &from)); 1055 PetscCall(VecRestoreArrayRead(xx, &x)); 1056 1057 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1058 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1059 /* supperdiagonal part */ 1060 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1061 PetscFunctionReturn(PETSC_SUCCESS); 1062 } 1063 1064 PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy) 1065 { 1066 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1067 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1068 PetscScalar *from; 1069 const PetscScalar *x; 1070 1071 PetscFunctionBegin; 1072 /* diagonal part */ 1073 PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a)); 1074 PetscCall(VecSet(a->slvec1b, 0.0)); 1075 1076 /* subdiagonal part */ 1077 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1078 1079 /* copy x into the vec slvec0 */ 1080 PetscCall(VecGetArray(a->slvec0, &from)); 1081 PetscCall(VecGetArrayRead(xx, &x)); 1082 1083 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1084 PetscCall(VecRestoreArray(a->slvec0, &from)); 1085 PetscCall(VecRestoreArrayRead(xx, &x)); 1086 1087 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1088 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1089 /* supperdiagonal part */ 1090 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy)); 1091 PetscFunctionReturn(PETSC_SUCCESS); 1092 } 1093 1094 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz) 1095 { 1096 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1097 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1098 PetscScalar *from, zero = 0.0; 1099 const PetscScalar *x; 1100 1101 PetscFunctionBegin; 1102 /* diagonal part */ 1103 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1104 PetscCall(VecSet(a->slvec1b, zero)); 1105 1106 /* subdiagonal part */ 1107 PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name); 1108 PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b)); 1109 1110 /* copy x into the vec slvec0 */ 1111 PetscCall(VecGetArray(a->slvec0, &from)); 1112 PetscCall(VecGetArrayRead(xx, &x)); 1113 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1114 PetscCall(VecRestoreArray(a->slvec0, &from)); 1115 1116 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1117 PetscCall(VecRestoreArrayRead(xx, &x)); 1118 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1119 1120 /* supperdiagonal part */ 1121 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1128 PetscInt mbs = a->mbs, bs = A->rmap->bs; 1129 PetscScalar *from, zero = 0.0; 1130 const PetscScalar *x; 1131 1132 PetscFunctionBegin; 1133 /* diagonal part */ 1134 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a)); 1135 PetscCall(VecSet(a->slvec1b, zero)); 1136 1137 /* subdiagonal part */ 1138 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b)); 1139 1140 /* copy x into the vec slvec0 */ 1141 PetscCall(VecGetArray(a->slvec0, &from)); 1142 PetscCall(VecGetArrayRead(xx, &x)); 1143 PetscCall(PetscArraycpy(from, x, bs * mbs)); 1144 PetscCall(VecRestoreArray(a->slvec0, &from)); 1145 1146 PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1147 PetscCall(VecRestoreArrayRead(xx, &x)); 1148 PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD)); 1149 1150 /* supperdiagonal part */ 1151 PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 /* 1156 This only works correctly for square matrices where the subblock A->A is the 1157 diagonal block 1158 */ 1159 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v) 1160 { 1161 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1162 1163 PetscFunctionBegin; 1164 /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */ 1165 PetscCall(MatGetDiagonal(a->A, v)); 1166 PetscFunctionReturn(PETSC_SUCCESS); 1167 } 1168 1169 PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa) 1170 { 1171 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1172 1173 PetscFunctionBegin; 1174 PetscCall(MatScale(a->A, aa)); 1175 PetscCall(MatScale(a->B, aa)); 1176 PetscFunctionReturn(PETSC_SUCCESS); 1177 } 1178 1179 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1180 { 1181 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 1182 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1183 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1184 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1185 PetscInt *cmap, *idx_p, cstart = mat->rstartbs; 1186 1187 PetscFunctionBegin; 1188 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1189 mat->getrowactive = PETSC_TRUE; 1190 1191 if (!mat->rowvalues && (idx || v)) { 1192 /* 1193 allocate enough space to hold information from the longest row. 1194 */ 1195 Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data; 1196 Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data; 1197 PetscInt max = 1, mbs = mat->mbs, tmp; 1198 for (i = 0; i < mbs; i++) { 1199 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */ 1200 if (max < tmp) max = tmp; 1201 } 1202 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1203 } 1204 1205 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1206 lrow = row - brstart; /* local row index */ 1207 1208 pvA = &vworkA; 1209 pcA = &cworkA; 1210 pvB = &vworkB; 1211 pcB = &cworkB; 1212 if (!v) { 1213 pvA = NULL; 1214 pvB = NULL; 1215 } 1216 if (!idx) { 1217 pcA = NULL; 1218 if (!v) pcB = NULL; 1219 } 1220 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1221 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1222 nztot = nzA + nzB; 1223 1224 cmap = mat->garray; 1225 if (v || idx) { 1226 if (nztot) { 1227 /* Sort by increasing column numbers, assuming A and B already sorted */ 1228 PetscInt imark = -1; 1229 if (v) { 1230 *v = v_p = mat->rowvalues; 1231 for (i = 0; i < nzB; i++) { 1232 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1233 else break; 1234 } 1235 imark = i; 1236 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1237 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1238 } 1239 if (idx) { 1240 *idx = idx_p = mat->rowindices; 1241 if (imark > -1) { 1242 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1243 } else { 1244 for (i = 0; i < nzB; i++) { 1245 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1246 else break; 1247 } 1248 imark = i; 1249 } 1250 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1251 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1252 } 1253 } else { 1254 if (idx) *idx = NULL; 1255 if (v) *v = NULL; 1256 } 1257 } 1258 *nz = nztot; 1259 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1260 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1261 PetscFunctionReturn(PETSC_SUCCESS); 1262 } 1263 1264 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1265 { 1266 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1267 1268 PetscFunctionBegin; 1269 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1270 baij->getrowactive = PETSC_FALSE; 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 1274 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A) 1275 { 1276 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1277 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1278 1279 PetscFunctionBegin; 1280 aA->getrow_utriangular = PETSC_TRUE; 1281 PetscFunctionReturn(PETSC_SUCCESS); 1282 } 1283 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A) 1284 { 1285 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1286 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1287 1288 PetscFunctionBegin; 1289 aA->getrow_utriangular = PETSC_FALSE; 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 1293 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat) 1294 { 1295 PetscFunctionBegin; 1296 if (PetscDefined(USE_COMPLEX)) { 1297 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data; 1298 1299 PetscCall(MatConjugate(a->A)); 1300 PetscCall(MatConjugate(a->B)); 1301 } 1302 PetscFunctionReturn(PETSC_SUCCESS); 1303 } 1304 1305 PetscErrorCode MatRealPart_MPISBAIJ(Mat A) 1306 { 1307 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1308 1309 PetscFunctionBegin; 1310 PetscCall(MatRealPart(a->A)); 1311 PetscCall(MatRealPart(a->B)); 1312 PetscFunctionReturn(PETSC_SUCCESS); 1313 } 1314 1315 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A) 1316 { 1317 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1318 1319 PetscFunctionBegin; 1320 PetscCall(MatImaginaryPart(a->A)); 1321 PetscCall(MatImaginaryPart(a->B)); 1322 PetscFunctionReturn(PETSC_SUCCESS); 1323 } 1324 1325 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ() 1326 Input: isrow - distributed(parallel), 1327 iscol_local - locally owned (seq) 1328 */ 1329 PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg) 1330 { 1331 PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch; 1332 const PetscInt *ptr1, *ptr2; 1333 1334 PetscFunctionBegin; 1335 PetscCall(ISGetLocalSize(isrow, &sz1)); 1336 PetscCall(ISGetLocalSize(iscol_local, &sz2)); 1337 if (sz1 > sz2) { 1338 *flg = PETSC_FALSE; 1339 PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 PetscCall(ISGetIndices(isrow, &ptr1)); 1343 PetscCall(ISGetIndices(iscol_local, &ptr2)); 1344 1345 PetscCall(PetscMalloc1(sz1, &a1)); 1346 PetscCall(PetscMalloc1(sz2, &a2)); 1347 PetscCall(PetscArraycpy(a1, ptr1, sz1)); 1348 PetscCall(PetscArraycpy(a2, ptr2, sz2)); 1349 PetscCall(PetscSortInt(sz1, a1)); 1350 PetscCall(PetscSortInt(sz2, a2)); 1351 1352 nmatch = 0; 1353 k = 0; 1354 for (i = 0; i < sz1; i++) { 1355 for (j = k; j < sz2; j++) { 1356 if (a1[i] == a2[j]) { 1357 k = j; 1358 nmatch++; 1359 break; 1360 } 1361 } 1362 } 1363 PetscCall(ISRestoreIndices(isrow, &ptr1)); 1364 PetscCall(ISRestoreIndices(iscol_local, &ptr2)); 1365 PetscCall(PetscFree(a1)); 1366 PetscCall(PetscFree(a2)); 1367 if (nmatch < sz1) { 1368 *flg = PETSC_FALSE; 1369 } else { 1370 *flg = PETSC_TRUE; 1371 } 1372 PetscFunctionReturn(PETSC_SUCCESS); 1373 } 1374 1375 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1376 { 1377 IS iscol_local; 1378 PetscInt csize; 1379 PetscBool isequal; 1380 1381 PetscFunctionBegin; 1382 PetscCall(ISGetLocalSize(iscol, &csize)); 1383 if (call == MAT_REUSE_MATRIX) { 1384 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1385 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1386 } else { 1387 PetscBool issorted; 1388 1389 PetscCall(ISAllGather(iscol, &iscol_local)); 1390 PetscCall(ISEqual_private(isrow, iscol_local, &isequal)); 1391 PetscCall(ISSorted(iscol_local, &issorted)); 1392 PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted"); 1393 } 1394 1395 /* now call MatCreateSubMatrix_MPIBAIJ() */ 1396 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1397 if (call == MAT_INITIAL_MATRIX) { 1398 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1399 PetscCall(ISDestroy(&iscol_local)); 1400 } 1401 PetscFunctionReturn(PETSC_SUCCESS); 1402 } 1403 1404 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A) 1405 { 1406 Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data; 1407 1408 PetscFunctionBegin; 1409 PetscCall(MatZeroEntries(l->A)); 1410 PetscCall(MatZeroEntries(l->B)); 1411 PetscFunctionReturn(PETSC_SUCCESS); 1412 } 1413 1414 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1415 { 1416 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data; 1417 Mat A = a->A, B = a->B; 1418 PetscLogDouble isend[5], irecv[5]; 1419 1420 PetscFunctionBegin; 1421 info->block_size = (PetscReal)matin->rmap->bs; 1422 1423 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1424 1425 isend[0] = info->nz_used; 1426 isend[1] = info->nz_allocated; 1427 isend[2] = info->nz_unneeded; 1428 isend[3] = info->memory; 1429 isend[4] = info->mallocs; 1430 1431 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1432 1433 isend[0] += info->nz_used; 1434 isend[1] += info->nz_allocated; 1435 isend[2] += info->nz_unneeded; 1436 isend[3] += info->memory; 1437 isend[4] += info->mallocs; 1438 if (flag == MAT_LOCAL) { 1439 info->nz_used = isend[0]; 1440 info->nz_allocated = isend[1]; 1441 info->nz_unneeded = isend[2]; 1442 info->memory = isend[3]; 1443 info->mallocs = isend[4]; 1444 } else if (flag == MAT_GLOBAL_MAX) { 1445 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1446 1447 info->nz_used = irecv[0]; 1448 info->nz_allocated = irecv[1]; 1449 info->nz_unneeded = irecv[2]; 1450 info->memory = irecv[3]; 1451 info->mallocs = irecv[4]; 1452 } else if (flag == MAT_GLOBAL_SUM) { 1453 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1454 1455 info->nz_used = irecv[0]; 1456 info->nz_allocated = irecv[1]; 1457 info->nz_unneeded = irecv[2]; 1458 info->memory = irecv[3]; 1459 info->mallocs = irecv[4]; 1460 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1461 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1462 info->fill_ratio_needed = 0; 1463 info->factor_mallocs = 0; 1464 PetscFunctionReturn(PETSC_SUCCESS); 1465 } 1466 1467 PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg) 1468 { 1469 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1470 Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data; 1471 1472 PetscFunctionBegin; 1473 switch (op) { 1474 case MAT_NEW_NONZERO_LOCATIONS: 1475 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1476 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1477 case MAT_KEEP_NONZERO_PATTERN: 1478 case MAT_SUBMAT_SINGLEIS: 1479 case MAT_NEW_NONZERO_LOCATION_ERR: 1480 MatCheckPreallocated(A, 1); 1481 PetscCall(MatSetOption(a->A, op, flg)); 1482 PetscCall(MatSetOption(a->B, op, flg)); 1483 break; 1484 case MAT_ROW_ORIENTED: 1485 MatCheckPreallocated(A, 1); 1486 a->roworiented = flg; 1487 1488 PetscCall(MatSetOption(a->A, op, flg)); 1489 PetscCall(MatSetOption(a->B, op, flg)); 1490 break; 1491 case MAT_FORCE_DIAGONAL_ENTRIES: 1492 case MAT_SORTED_FULL: 1493 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1494 break; 1495 case MAT_IGNORE_OFF_PROC_ENTRIES: 1496 a->donotstash = flg; 1497 break; 1498 case MAT_USE_HASH_TABLE: 1499 a->ht_flag = flg; 1500 break; 1501 case MAT_HERMITIAN: 1502 MatCheckPreallocated(A, 1); 1503 PetscCall(MatSetOption(a->A, op, flg)); 1504 #if defined(PETSC_USE_COMPLEX) 1505 if (flg) { /* need different mat-vec ops */ 1506 A->ops->mult = MatMult_MPISBAIJ_Hermitian; 1507 A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian; 1508 A->ops->multtranspose = NULL; 1509 A->ops->multtransposeadd = NULL; 1510 A->symmetric = PETSC_BOOL3_FALSE; 1511 } 1512 #endif 1513 break; 1514 case MAT_SPD: 1515 case MAT_SYMMETRIC: 1516 MatCheckPreallocated(A, 1); 1517 PetscCall(MatSetOption(a->A, op, flg)); 1518 #if defined(PETSC_USE_COMPLEX) 1519 if (flg) { /* restore to use default mat-vec ops */ 1520 A->ops->mult = MatMult_MPISBAIJ; 1521 A->ops->multadd = MatMultAdd_MPISBAIJ; 1522 A->ops->multtranspose = MatMult_MPISBAIJ; 1523 A->ops->multtransposeadd = MatMultAdd_MPISBAIJ; 1524 } 1525 #endif 1526 break; 1527 case MAT_STRUCTURALLY_SYMMETRIC: 1528 MatCheckPreallocated(A, 1); 1529 PetscCall(MatSetOption(a->A, op, flg)); 1530 break; 1531 case MAT_SYMMETRY_ETERNAL: 1532 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1533 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric"); 1534 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1535 break; 1536 case MAT_SPD_ETERNAL: 1537 break; 1538 case MAT_IGNORE_LOWER_TRIANGULAR: 1539 aA->ignore_ltriangular = flg; 1540 break; 1541 case MAT_ERROR_LOWER_TRIANGULAR: 1542 aA->ignore_ltriangular = flg; 1543 break; 1544 case MAT_GETROW_UPPERTRIANGULAR: 1545 aA->getrow_utriangular = flg; 1546 break; 1547 default: 1548 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1549 } 1550 PetscFunctionReturn(PETSC_SUCCESS); 1551 } 1552 1553 PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B) 1554 { 1555 PetscFunctionBegin; 1556 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B)); 1557 if (reuse == MAT_INITIAL_MATRIX) { 1558 PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B)); 1559 } else if (reuse == MAT_REUSE_MATRIX) { 1560 PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN)); 1561 } 1562 PetscFunctionReturn(PETSC_SUCCESS); 1563 } 1564 1565 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr) 1566 { 1567 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data; 1568 Mat a = baij->A, b = baij->B; 1569 PetscInt nv, m, n; 1570 PetscBool flg; 1571 1572 PetscFunctionBegin; 1573 if (ll != rr) { 1574 PetscCall(VecEqual(ll, rr, &flg)); 1575 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same"); 1576 } 1577 if (!ll) PetscFunctionReturn(PETSC_SUCCESS); 1578 1579 PetscCall(MatGetLocalSize(mat, &m, &n)); 1580 PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n); 1581 1582 PetscCall(VecGetLocalSize(rr, &nv)); 1583 PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size"); 1584 1585 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1586 1587 /* left diagonalscale the off-diagonal part */ 1588 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1589 1590 /* scale the diagonal part */ 1591 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1592 1593 /* right diagonalscale the off-diagonal part */ 1594 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1595 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1596 PetscFunctionReturn(PETSC_SUCCESS); 1597 } 1598 1599 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A) 1600 { 1601 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1602 1603 PetscFunctionBegin; 1604 PetscCall(MatSetUnfactored(a->A)); 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *); 1609 1610 PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag) 1611 { 1612 Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data; 1613 Mat a, b, c, d; 1614 PetscBool flg; 1615 1616 PetscFunctionBegin; 1617 a = matA->A; 1618 b = matA->B; 1619 c = matB->A; 1620 d = matB->B; 1621 1622 PetscCall(MatEqual(a, c, &flg)); 1623 if (flg) PetscCall(MatEqual(b, d, &flg)); 1624 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1625 PetscFunctionReturn(PETSC_SUCCESS); 1626 } 1627 1628 PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str) 1629 { 1630 PetscBool isbaij; 1631 1632 PetscFunctionBegin; 1633 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, "")); 1634 PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name); 1635 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1636 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1637 PetscCall(MatGetRowUpperTriangular(A)); 1638 PetscCall(MatCopy_Basic(A, B, str)); 1639 PetscCall(MatRestoreRowUpperTriangular(A)); 1640 } else { 1641 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1642 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1643 1644 PetscCall(MatCopy(a->A, b->A, str)); 1645 PetscCall(MatCopy(a->B, b->B, str)); 1646 } 1647 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1648 PetscFunctionReturn(PETSC_SUCCESS); 1649 } 1650 1651 PetscErrorCode MatSetUp_MPISBAIJ(Mat A) 1652 { 1653 PetscFunctionBegin; 1654 PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1655 PetscFunctionReturn(PETSC_SUCCESS); 1656 } 1657 1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1659 { 1660 Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data; 1661 PetscBLASInt bnz, one = 1; 1662 Mat_SeqSBAIJ *xa, *ya; 1663 Mat_SeqBAIJ *xb, *yb; 1664 1665 PetscFunctionBegin; 1666 if (str == SAME_NONZERO_PATTERN) { 1667 PetscScalar alpha = a; 1668 xa = (Mat_SeqSBAIJ *)xx->A->data; 1669 ya = (Mat_SeqSBAIJ *)yy->A->data; 1670 PetscCall(PetscBLASIntCast(xa->nz, &bnz)); 1671 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one)); 1672 xb = (Mat_SeqBAIJ *)xx->B->data; 1673 yb = (Mat_SeqBAIJ *)yy->B->data; 1674 PetscCall(PetscBLASIntCast(xb->nz, &bnz)); 1675 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one)); 1676 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1677 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1678 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 1679 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1680 PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 1681 } else { 1682 Mat B; 1683 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1684 PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size"); 1685 PetscCall(MatGetRowUpperTriangular(X)); 1686 PetscCall(MatGetRowUpperTriangular(Y)); 1687 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1688 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1689 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1690 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1691 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1692 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1693 PetscCall(MatSetType(B, MATMPISBAIJ)); 1694 PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d)); 1695 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1696 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1697 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1698 PetscCall(MatHeaderMerge(Y, &B)); 1699 PetscCall(PetscFree(nnz_d)); 1700 PetscCall(PetscFree(nnz_o)); 1701 PetscCall(MatRestoreRowUpperTriangular(X)); 1702 PetscCall(MatRestoreRowUpperTriangular(Y)); 1703 } 1704 PetscFunctionReturn(PETSC_SUCCESS); 1705 } 1706 1707 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[]) 1708 { 1709 PetscInt i; 1710 PetscBool flg; 1711 1712 PetscFunctionBegin; 1713 PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */ 1714 for (i = 0; i < n; i++) { 1715 PetscCall(ISEqual(irow[i], icol[i], &flg)); 1716 if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i])); 1717 } 1718 PetscFunctionReturn(PETSC_SUCCESS); 1719 } 1720 1721 PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a) 1722 { 1723 Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data; 1724 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data; 1725 1726 PetscFunctionBegin; 1727 if (!Y->preallocated) { 1728 PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 1729 } else if (!aij->nz) { 1730 PetscInt nonew = aij->nonew; 1731 PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 1732 aij->nonew = nonew; 1733 } 1734 PetscCall(MatShift_Basic(Y, a)); 1735 PetscFunctionReturn(PETSC_SUCCESS); 1736 } 1737 1738 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d) 1739 { 1740 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 1741 1742 PetscFunctionBegin; 1743 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 1744 PetscCall(MatMissingDiagonal(a->A, missing, d)); 1745 if (d) { 1746 PetscInt rstart; 1747 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1748 *d += rstart / A->rmap->bs; 1749 } 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a) 1754 { 1755 PetscFunctionBegin; 1756 *a = ((Mat_MPISBAIJ *)A->data)->A; 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 /* -------------------------------------------------------------------*/ 1761 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ, 1762 MatGetRow_MPISBAIJ, 1763 MatRestoreRow_MPISBAIJ, 1764 MatMult_MPISBAIJ, 1765 /* 4*/ MatMultAdd_MPISBAIJ, 1766 MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */ 1767 MatMultAdd_MPISBAIJ, 1768 NULL, 1769 NULL, 1770 NULL, 1771 /* 10*/ NULL, 1772 NULL, 1773 NULL, 1774 MatSOR_MPISBAIJ, 1775 MatTranspose_MPISBAIJ, 1776 /* 15*/ MatGetInfo_MPISBAIJ, 1777 MatEqual_MPISBAIJ, 1778 MatGetDiagonal_MPISBAIJ, 1779 MatDiagonalScale_MPISBAIJ, 1780 MatNorm_MPISBAIJ, 1781 /* 20*/ MatAssemblyBegin_MPISBAIJ, 1782 MatAssemblyEnd_MPISBAIJ, 1783 MatSetOption_MPISBAIJ, 1784 MatZeroEntries_MPISBAIJ, 1785 /* 24*/ NULL, 1786 NULL, 1787 NULL, 1788 NULL, 1789 NULL, 1790 /* 29*/ MatSetUp_MPISBAIJ, 1791 NULL, 1792 NULL, 1793 MatGetDiagonalBlock_MPISBAIJ, 1794 NULL, 1795 /* 34*/ MatDuplicate_MPISBAIJ, 1796 NULL, 1797 NULL, 1798 NULL, 1799 NULL, 1800 /* 39*/ MatAXPY_MPISBAIJ, 1801 MatCreateSubMatrices_MPISBAIJ, 1802 MatIncreaseOverlap_MPISBAIJ, 1803 MatGetValues_MPISBAIJ, 1804 MatCopy_MPISBAIJ, 1805 /* 44*/ NULL, 1806 MatScale_MPISBAIJ, 1807 MatShift_MPISBAIJ, 1808 NULL, 1809 NULL, 1810 /* 49*/ NULL, 1811 NULL, 1812 NULL, 1813 NULL, 1814 NULL, 1815 /* 54*/ NULL, 1816 NULL, 1817 MatSetUnfactored_MPISBAIJ, 1818 NULL, 1819 MatSetValuesBlocked_MPISBAIJ, 1820 /* 59*/ MatCreateSubMatrix_MPISBAIJ, 1821 NULL, 1822 NULL, 1823 NULL, 1824 NULL, 1825 /* 64*/ NULL, 1826 NULL, 1827 NULL, 1828 NULL, 1829 NULL, 1830 /* 69*/ MatGetRowMaxAbs_MPISBAIJ, 1831 NULL, 1832 MatConvert_MPISBAIJ_Basic, 1833 NULL, 1834 NULL, 1835 /* 74*/ NULL, 1836 NULL, 1837 NULL, 1838 NULL, 1839 NULL, 1840 /* 79*/ NULL, 1841 NULL, 1842 NULL, 1843 NULL, 1844 MatLoad_MPISBAIJ, 1845 /* 84*/ NULL, 1846 NULL, 1847 NULL, 1848 NULL, 1849 NULL, 1850 /* 89*/ NULL, 1851 NULL, 1852 NULL, 1853 NULL, 1854 NULL, 1855 /* 94*/ NULL, 1856 NULL, 1857 NULL, 1858 NULL, 1859 NULL, 1860 /* 99*/ NULL, 1861 NULL, 1862 NULL, 1863 MatConjugate_MPISBAIJ, 1864 NULL, 1865 /*104*/ NULL, 1866 MatRealPart_MPISBAIJ, 1867 MatImaginaryPart_MPISBAIJ, 1868 MatGetRowUpperTriangular_MPISBAIJ, 1869 MatRestoreRowUpperTriangular_MPISBAIJ, 1870 /*109*/ NULL, 1871 NULL, 1872 NULL, 1873 NULL, 1874 MatMissingDiagonal_MPISBAIJ, 1875 /*114*/ NULL, 1876 NULL, 1877 NULL, 1878 NULL, 1879 NULL, 1880 /*119*/ NULL, 1881 NULL, 1882 NULL, 1883 NULL, 1884 NULL, 1885 /*124*/ NULL, 1886 NULL, 1887 NULL, 1888 NULL, 1889 NULL, 1890 /*129*/ NULL, 1891 NULL, 1892 NULL, 1893 NULL, 1894 NULL, 1895 /*134*/ NULL, 1896 NULL, 1897 NULL, 1898 NULL, 1899 NULL, 1900 /*139*/ MatSetBlockSizes_Default, 1901 NULL, 1902 NULL, 1903 NULL, 1904 NULL, 1905 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ, 1906 NULL, 1907 NULL, 1908 NULL, 1909 NULL, 1910 NULL, 1911 /*150*/ NULL, 1912 NULL}; 1913 1914 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 1915 { 1916 Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data; 1917 PetscInt i, mbs, Mbs; 1918 PetscMPIInt size; 1919 1920 PetscFunctionBegin; 1921 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 1922 PetscCall(PetscLayoutSetUp(B->rmap)); 1923 PetscCall(PetscLayoutSetUp(B->cmap)); 1924 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 1925 PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N); 1926 PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n); 1927 1928 mbs = B->rmap->n / bs; 1929 Mbs = B->rmap->N / bs; 1930 PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs); 1931 1932 B->rmap->bs = bs; 1933 b->bs2 = bs * bs; 1934 b->mbs = mbs; 1935 b->Mbs = Mbs; 1936 b->nbs = B->cmap->n / bs; 1937 b->Nbs = B->cmap->N / bs; 1938 1939 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 1940 b->rstartbs = B->rmap->rstart / bs; 1941 b->rendbs = B->rmap->rend / bs; 1942 1943 b->cstartbs = B->cmap->rstart / bs; 1944 b->cendbs = B->cmap->rend / bs; 1945 1946 #if defined(PETSC_USE_CTABLE) 1947 PetscCall(PetscHMapIDestroy(&b->colmap)); 1948 #else 1949 PetscCall(PetscFree(b->colmap)); 1950 #endif 1951 PetscCall(PetscFree(b->garray)); 1952 PetscCall(VecDestroy(&b->lvec)); 1953 PetscCall(VecScatterDestroy(&b->Mvctx)); 1954 PetscCall(VecDestroy(&b->slvec0)); 1955 PetscCall(VecDestroy(&b->slvec0b)); 1956 PetscCall(VecDestroy(&b->slvec1)); 1957 PetscCall(VecDestroy(&b->slvec1a)); 1958 PetscCall(VecDestroy(&b->slvec1b)); 1959 PetscCall(VecScatterDestroy(&b->sMvctx)); 1960 1961 /* Because the B will have been resized we simply destroy it and create a new one each time */ 1962 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 1963 PetscCall(MatDestroy(&b->B)); 1964 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 1965 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 1966 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 1967 1968 if (!B->preallocated) { 1969 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 1970 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 1971 PetscCall(MatSetType(b->A, MATSEQSBAIJ)); 1972 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 1973 } 1974 1975 PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 1976 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 1977 1978 B->preallocated = PETSC_TRUE; 1979 B->was_assembled = PETSC_FALSE; 1980 B->assembled = PETSC_FALSE; 1981 PetscFunctionReturn(PETSC_SUCCESS); 1982 } 1983 1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 1985 { 1986 PetscInt m, rstart, cend; 1987 PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 1988 const PetscInt *JJ = NULL; 1989 PetscScalar *values = NULL; 1990 PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented; 1991 PetscBool nooffprocentries; 1992 1993 PetscFunctionBegin; 1994 PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs); 1995 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 1996 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 1997 PetscCall(PetscLayoutSetUp(B->rmap)); 1998 PetscCall(PetscLayoutSetUp(B->cmap)); 1999 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2000 m = B->rmap->n / bs; 2001 rstart = B->rmap->rstart / bs; 2002 cend = B->cmap->rend / bs; 2003 2004 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2005 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2006 for (i = 0; i < m; i++) { 2007 nz = ii[i + 1] - ii[i]; 2008 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2009 /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */ 2010 JJ = jj + ii[i]; 2011 bd = 0; 2012 for (j = 0; j < nz; j++) { 2013 if (*JJ >= i + rstart) break; 2014 JJ++; 2015 bd++; 2016 } 2017 d = 0; 2018 for (; j < nz; j++) { 2019 if (*JJ++ >= cend) break; 2020 d++; 2021 } 2022 d_nnz[i] = d; 2023 o_nnz[i] = nz - d - bd; 2024 nz = nz - bd; 2025 nz_max = PetscMax(nz_max, nz); 2026 } 2027 PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2028 PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE)); 2029 PetscCall(PetscFree2(d_nnz, o_nnz)); 2030 2031 values = (PetscScalar *)V; 2032 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2033 for (i = 0; i < m; i++) { 2034 PetscInt row = i + rstart; 2035 PetscInt ncols = ii[i + 1] - ii[i]; 2036 const PetscInt *icols = jj + ii[i]; 2037 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2038 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2039 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2040 } else { /* block ordering does not match so we can only insert one block at a time. */ 2041 PetscInt j; 2042 for (j = 0; j < ncols; j++) { 2043 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2044 PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2045 } 2046 } 2047 } 2048 2049 if (!V) PetscCall(PetscFree(values)); 2050 nooffprocentries = B->nooffprocentries; 2051 B->nooffprocentries = PETSC_TRUE; 2052 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2053 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2054 B->nooffprocentries = nooffprocentries; 2055 2056 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2057 PetscFunctionReturn(PETSC_SUCCESS); 2058 } 2059 2060 /*MC 2061 MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices, 2062 based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of 2063 the matrix is stored. 2064 2065 For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you 2066 can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`); 2067 2068 Options Database Keys: 2069 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()` 2070 2071 Note: 2072 The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the 2073 diagonal portion of the matrix of each process has to less than or equal the number of columns. 2074 2075 Level: beginner 2076 2077 .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType` 2078 M*/ 2079 2080 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B) 2081 { 2082 Mat_MPISBAIJ *b; 2083 PetscBool flg = PETSC_FALSE; 2084 2085 PetscFunctionBegin; 2086 PetscCall(PetscNew(&b)); 2087 B->data = (void *)b; 2088 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2089 2090 B->ops->destroy = MatDestroy_MPISBAIJ; 2091 B->ops->view = MatView_MPISBAIJ; 2092 B->assembled = PETSC_FALSE; 2093 B->insertmode = NOT_SET_VALUES; 2094 2095 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2096 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2097 2098 /* build local table of row and column ownerships */ 2099 PetscCall(PetscMalloc1(b->size + 2, &b->rangebs)); 2100 2101 /* build cache for off array entries formed */ 2102 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2103 2104 b->donotstash = PETSC_FALSE; 2105 b->colmap = NULL; 2106 b->garray = NULL; 2107 b->roworiented = PETSC_TRUE; 2108 2109 /* stuff used in block assembly */ 2110 b->barray = NULL; 2111 2112 /* stuff used for matrix vector multiply */ 2113 b->lvec = NULL; 2114 b->Mvctx = NULL; 2115 b->slvec0 = NULL; 2116 b->slvec0b = NULL; 2117 b->slvec1 = NULL; 2118 b->slvec1a = NULL; 2119 b->slvec1b = NULL; 2120 b->sMvctx = NULL; 2121 2122 /* stuff for MatGetRow() */ 2123 b->rowindices = NULL; 2124 b->rowvalues = NULL; 2125 b->getrowactive = PETSC_FALSE; 2126 2127 /* hash table stuff */ 2128 b->ht = NULL; 2129 b->hd = NULL; 2130 b->ht_size = 0; 2131 b->ht_flag = PETSC_FALSE; 2132 b->ht_fact = 0; 2133 b->ht_total_ct = 0; 2134 b->ht_insert_ct = 0; 2135 2136 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2137 b->ijonly = PETSC_FALSE; 2138 2139 b->in_loc = NULL; 2140 b->v_loc = NULL; 2141 b->n_loc = 0; 2142 2143 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ)); 2144 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ)); 2145 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ)); 2146 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ)); 2147 #if defined(PETSC_HAVE_ELEMENTAL) 2148 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental)); 2149 #endif 2150 #if defined(PETSC_HAVE_SCALAPACK) 2151 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK)); 2152 #endif 2153 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic)); 2154 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic)); 2155 2156 B->symmetric = PETSC_BOOL3_TRUE; 2157 B->structurally_symmetric = PETSC_BOOL3_TRUE; 2158 B->symmetry_eternal = PETSC_TRUE; 2159 B->structural_symmetry_eternal = PETSC_TRUE; 2160 #if defined(PETSC_USE_COMPLEX) 2161 B->hermitian = PETSC_BOOL3_FALSE; 2162 #else 2163 B->hermitian = PETSC_BOOL3_TRUE; 2164 #endif 2165 2166 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ)); 2167 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat"); 2168 PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL)); 2169 if (flg) { 2170 PetscReal fact = 1.39; 2171 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2172 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2173 if (fact <= 1.0) fact = 1.39; 2174 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2175 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2176 } 2177 PetscOptionsEnd(); 2178 PetscFunctionReturn(PETSC_SUCCESS); 2179 } 2180 2181 /*MC 2182 MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices. 2183 2184 This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator, 2185 and `MATMPISBAIJ` otherwise. 2186 2187 Options Database Key: 2188 . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()` 2189 2190 Level: beginner 2191 2192 .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ` 2193 M*/ 2194 2195 /*@C 2196 MatMPISBAIJSetPreallocation - For good matrix assembly performance 2197 the user should preallocate the matrix storage by setting the parameters 2198 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2199 performance can be increased by more than a factor of 50. 2200 2201 Collective 2202 2203 Input Parameters: 2204 + B - the matrix 2205 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 2206 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2207 . d_nz - number of block nonzeros per block row in diagonal portion of local 2208 submatrix (same for all local rows) 2209 . d_nnz - array containing the number of block nonzeros in the various block rows 2210 in the upper triangular and diagonal part of the in diagonal portion of the local 2211 (possibly different for each block row) or NULL. If you plan to factor the matrix you must leave room 2212 for the diagonal entry and set a value even if it is zero. 2213 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2214 submatrix (same for all local rows). 2215 - o_nnz - array containing the number of nonzeros in the various block rows of the 2216 off-diagonal portion of the local submatrix that is right of the diagonal 2217 (possibly different for each block row) or NULL. 2218 2219 Options Database Keys: 2220 + -mat_no_unroll - uses code that does not unroll the loops in the 2221 block calculations (much slower) 2222 - -mat_block_size - size of the blocks to use 2223 2224 Notes: 2225 2226 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2227 than it must be used on all processors that share the object for that argument. 2228 2229 If the *_nnz parameter is given then the *_nz parameter is ignored 2230 2231 Storage Information: 2232 For a square global matrix we define each processor's diagonal portion 2233 to be its local rows and the corresponding columns (a square submatrix); 2234 each processor's off-diagonal portion encompasses the remainder of the 2235 local matrix (a rectangular submatrix). 2236 2237 The user can specify preallocated storage for the diagonal part of 2238 the local submatrix with either d_nz or d_nnz (not both). Set 2239 d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic 2240 memory allocation. Likewise, specify preallocated storage for the 2241 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2242 2243 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2244 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2245 You can also run with the option -info and look for messages with the string 2246 malloc in them to see if additional memory allocation was needed. 2247 2248 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2249 the figure below we depict these three local rows and all columns (0-11). 2250 2251 .vb 2252 0 1 2 3 4 5 6 7 8 9 10 11 2253 -------------------------- 2254 row 3 |. . . d d d o o o o o o 2255 row 4 |. . . d d d o o o o o o 2256 row 5 |. . . d d d o o o o o o 2257 -------------------------- 2258 .ve 2259 2260 Thus, any entries in the d locations are stored in the d (diagonal) 2261 submatrix, and any entries in the o locations are stored in the 2262 o (off-diagonal) submatrix. Note that the d matrix is stored in 2263 `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format. 2264 2265 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2266 plus the diagonal part of the d matrix, 2267 and o_nz should indicate the number of block nonzeros per row in the o matrix 2268 2269 In general, for PDE problems in which most nonzeros are near the diagonal, 2270 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2271 or you will get TERRIBLE performance; see the users' manual chapter on 2272 matrices. 2273 2274 Level: intermediate 2275 2276 .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()` 2277 @*/ 2278 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2279 { 2280 PetscFunctionBegin; 2281 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2282 PetscValidType(B, 1); 2283 PetscValidLogicalCollectiveInt(B, bs, 2); 2284 PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2285 PetscFunctionReturn(PETSC_SUCCESS); 2286 } 2287 2288 /*@C 2289 MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`, 2290 (block compressed row). For good matrix assembly performance 2291 the user should preallocate the matrix storage by setting the parameters 2292 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2293 performance can be increased by more than a factor of 50. 2294 2295 Collective 2296 2297 Input Parameters: 2298 + comm - MPI communicator 2299 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2300 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2301 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2302 This value should be the same as the local size used in creating the 2303 y vector for the matrix-vector product y = Ax. 2304 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2305 This value should be the same as the local size used in creating the 2306 x vector for the matrix-vector product y = Ax. 2307 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2308 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2309 . d_nz - number of block nonzeros per block row in diagonal portion of local 2310 submatrix (same for all local rows) 2311 . d_nnz - array containing the number of block nonzeros in the various block rows 2312 in the upper triangular portion of the in diagonal portion of the local 2313 (possibly different for each block block row) or NULL. 2314 If you plan to factor the matrix you must leave room for the diagonal entry and 2315 set its value even if it is zero. 2316 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2317 submatrix (same for all local rows). 2318 - o_nnz - array containing the number of nonzeros in the various block rows of the 2319 off-diagonal portion of the local submatrix (possibly different for 2320 each block row) or NULL. 2321 2322 Output Parameter: 2323 . A - the matrix 2324 2325 Options Database Keys: 2326 + -mat_no_unroll - uses code that does not unroll the loops in the 2327 block calculations (much slower) 2328 . -mat_block_size - size of the blocks to use 2329 - -mat_mpi - use the parallel matrix data structures even on one processor 2330 (defaults to using SeqBAIJ format on one processor) 2331 2332 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 2333 MatXXXXSetPreallocation() paradigm instead of this routine directly. 2334 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 2335 2336 Notes: 2337 The number of rows and columns must be divisible by blocksize. 2338 This matrix type does not support complex Hermitian operation. 2339 2340 The user MUST specify either the local or global matrix dimensions 2341 (possibly both). 2342 2343 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2344 than it must be used on all processors that share the object for that argument. 2345 2346 If the *_nnz parameter is given then the *_nz parameter is ignored 2347 2348 Storage Information: 2349 For a square global matrix we define each processor's diagonal portion 2350 to be its local rows and the corresponding columns (a square submatrix); 2351 each processor's off-diagonal portion encompasses the remainder of the 2352 local matrix (a rectangular submatrix). 2353 2354 The user can specify preallocated storage for the diagonal part of 2355 the local submatrix with either d_nz or d_nnz (not both). Set 2356 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2357 memory allocation. Likewise, specify preallocated storage for the 2358 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2359 2360 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2361 the figure below we depict these three local rows and all columns (0-11). 2362 2363 .vb 2364 0 1 2 3 4 5 6 7 8 9 10 11 2365 -------------------------- 2366 row 3 |. . . d d d o o o o o o 2367 row 4 |. . . d d d o o o o o o 2368 row 5 |. . . d d d o o o o o o 2369 -------------------------- 2370 .ve 2371 2372 Thus, any entries in the d locations are stored in the d (diagonal) 2373 submatrix, and any entries in the o locations are stored in the 2374 o (off-diagonal) submatrix. Note that the d matrix is stored in 2375 MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format. 2376 2377 Now d_nz should indicate the number of block nonzeros per row in the upper triangular 2378 plus the diagonal part of the d matrix, 2379 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2380 In general, for PDE problems in which most nonzeros are near the diagonal, 2381 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2382 or you will get TERRIBLE performance; see the users' manual chapter on 2383 matrices. 2384 2385 Level: intermediate 2386 2387 .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()` 2388 @*/ 2389 2390 PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 2391 { 2392 PetscMPIInt size; 2393 2394 PetscFunctionBegin; 2395 PetscCall(MatCreate(comm, A)); 2396 PetscCall(MatSetSizes(*A, m, n, M, N)); 2397 PetscCallMPI(MPI_Comm_size(comm, &size)); 2398 if (size > 1) { 2399 PetscCall(MatSetType(*A, MATMPISBAIJ)); 2400 PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 2401 } else { 2402 PetscCall(MatSetType(*A, MATSEQSBAIJ)); 2403 PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 2404 } 2405 PetscFunctionReturn(PETSC_SUCCESS); 2406 } 2407 2408 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2409 { 2410 Mat mat; 2411 Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data; 2412 PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs; 2413 PetscScalar *array; 2414 2415 PetscFunctionBegin; 2416 *newmat = NULL; 2417 2418 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2419 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2420 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2421 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2422 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2423 2424 mat->factortype = matin->factortype; 2425 mat->preallocated = PETSC_TRUE; 2426 mat->assembled = PETSC_TRUE; 2427 mat->insertmode = NOT_SET_VALUES; 2428 2429 a = (Mat_MPISBAIJ *)mat->data; 2430 a->bs2 = oldmat->bs2; 2431 a->mbs = oldmat->mbs; 2432 a->nbs = oldmat->nbs; 2433 a->Mbs = oldmat->Mbs; 2434 a->Nbs = oldmat->Nbs; 2435 2436 a->size = oldmat->size; 2437 a->rank = oldmat->rank; 2438 a->donotstash = oldmat->donotstash; 2439 a->roworiented = oldmat->roworiented; 2440 a->rowindices = NULL; 2441 a->rowvalues = NULL; 2442 a->getrowactive = PETSC_FALSE; 2443 a->barray = NULL; 2444 a->rstartbs = oldmat->rstartbs; 2445 a->rendbs = oldmat->rendbs; 2446 a->cstartbs = oldmat->cstartbs; 2447 a->cendbs = oldmat->cendbs; 2448 2449 /* hash table stuff */ 2450 a->ht = NULL; 2451 a->hd = NULL; 2452 a->ht_size = 0; 2453 a->ht_flag = oldmat->ht_flag; 2454 a->ht_fact = oldmat->ht_fact; 2455 a->ht_total_ct = 0; 2456 a->ht_insert_ct = 0; 2457 2458 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2)); 2459 if (oldmat->colmap) { 2460 #if defined(PETSC_USE_CTABLE) 2461 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2462 #else 2463 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 2464 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 2465 #endif 2466 } else a->colmap = NULL; 2467 2468 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 2469 PetscCall(PetscMalloc1(len, &a->garray)); 2470 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2471 } else a->garray = NULL; 2472 2473 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 2474 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2475 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2476 2477 PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0)); 2478 PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1)); 2479 2480 PetscCall(VecGetLocalSize(a->slvec1, &nt)); 2481 PetscCall(VecGetArray(a->slvec1, &array)); 2482 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a)); 2483 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b)); 2484 PetscCall(VecRestoreArray(a->slvec1, &array)); 2485 PetscCall(VecGetArray(a->slvec0, &array)); 2486 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b)); 2487 PetscCall(VecRestoreArray(a->slvec0, &array)); 2488 2489 /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */ 2490 PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx)); 2491 a->sMvctx = oldmat->sMvctx; 2492 2493 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2494 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2495 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2496 *newmat = mat; 2497 PetscFunctionReturn(PETSC_SUCCESS); 2498 } 2499 2500 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 2501 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary 2502 2503 PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer) 2504 { 2505 PetscBool isbinary; 2506 2507 PetscFunctionBegin; 2508 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2509 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 2510 PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer)); 2511 PetscFunctionReturn(PETSC_SUCCESS); 2512 } 2513 2514 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[]) 2515 { 2516 Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data; 2517 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data; 2518 PetscReal atmp; 2519 PetscReal *work, *svalues, *rvalues; 2520 PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol; 2521 PetscMPIInt rank, size; 2522 PetscInt *rowners_bs, dest, count, source; 2523 PetscScalar *va; 2524 MatScalar *ba; 2525 MPI_Status stat; 2526 2527 PetscFunctionBegin; 2528 PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov"); 2529 PetscCall(MatGetRowMaxAbs(a->A, v, NULL)); 2530 PetscCall(VecGetArray(v, &va)); 2531 2532 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2533 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2534 2535 bs = A->rmap->bs; 2536 mbs = a->mbs; 2537 Mbs = a->Mbs; 2538 ba = b->a; 2539 bi = b->i; 2540 bj = b->j; 2541 2542 /* find ownerships */ 2543 rowners_bs = A->rmap->range; 2544 2545 /* each proc creates an array to be distributed */ 2546 PetscCall(PetscCalloc1(bs * Mbs, &work)); 2547 2548 /* row_max for B */ 2549 if (rank != size - 1) { 2550 for (i = 0; i < mbs; i++) { 2551 ncols = bi[1] - bi[0]; 2552 bi++; 2553 brow = bs * i; 2554 for (j = 0; j < ncols; j++) { 2555 bcol = bs * (*bj); 2556 for (kcol = 0; kcol < bs; kcol++) { 2557 col = bcol + kcol; /* local col index */ 2558 col += rowners_bs[rank + 1]; /* global col index */ 2559 for (krow = 0; krow < bs; krow++) { 2560 atmp = PetscAbsScalar(*ba); 2561 ba++; 2562 row = brow + krow; /* local row index */ 2563 if (PetscRealPart(va[row]) < atmp) va[row] = atmp; 2564 if (work[col] < atmp) work[col] = atmp; 2565 } 2566 } 2567 bj++; 2568 } 2569 } 2570 2571 /* send values to its owners */ 2572 for (dest = rank + 1; dest < size; dest++) { 2573 svalues = work + rowners_bs[dest]; 2574 count = rowners_bs[dest + 1] - rowners_bs[dest]; 2575 PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A))); 2576 } 2577 } 2578 2579 /* receive values */ 2580 if (rank) { 2581 rvalues = work; 2582 count = rowners_bs[rank + 1] - rowners_bs[rank]; 2583 for (source = 0; source < rank; source++) { 2584 PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat)); 2585 /* process values */ 2586 for (i = 0; i < count; i++) { 2587 if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i]; 2588 } 2589 } 2590 } 2591 2592 PetscCall(VecRestoreArray(v, &va)); 2593 PetscCall(PetscFree(work)); 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2598 { 2599 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data; 2600 PetscInt mbs = mat->mbs, bs = matin->rmap->bs; 2601 PetscScalar *x, *ptr, *from; 2602 Vec bb1; 2603 const PetscScalar *b; 2604 2605 PetscFunctionBegin; 2606 PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 2607 PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented"); 2608 2609 if (flag == SOR_APPLY_UPPER) { 2610 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2611 PetscFunctionReturn(PETSC_SUCCESS); 2612 } 2613 2614 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2615 if (flag & SOR_ZERO_INITIAL_GUESS) { 2616 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx)); 2617 its--; 2618 } 2619 2620 PetscCall(VecDuplicate(bb, &bb1)); 2621 while (its--) { 2622 /* lower triangular part: slvec0b = - B^T*xx */ 2623 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2624 2625 /* copy xx into slvec0a */ 2626 PetscCall(VecGetArray(mat->slvec0, &ptr)); 2627 PetscCall(VecGetArray(xx, &x)); 2628 PetscCall(PetscArraycpy(ptr, x, bs * mbs)); 2629 PetscCall(VecRestoreArray(mat->slvec0, &ptr)); 2630 2631 PetscCall(VecScale(mat->slvec0, -1.0)); 2632 2633 /* copy bb into slvec1a */ 2634 PetscCall(VecGetArray(mat->slvec1, &ptr)); 2635 PetscCall(VecGetArrayRead(bb, &b)); 2636 PetscCall(PetscArraycpy(ptr, b, bs * mbs)); 2637 PetscCall(VecRestoreArray(mat->slvec1, &ptr)); 2638 2639 /* set slvec1b = 0 */ 2640 PetscCall(VecSet(mat->slvec1b, 0.0)); 2641 2642 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2643 PetscCall(VecRestoreArray(xx, &x)); 2644 PetscCall(VecRestoreArrayRead(bb, &b)); 2645 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2646 2647 /* upper triangular part: bb1 = bb1 - B*x */ 2648 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1)); 2649 2650 /* local diagonal sweep */ 2651 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx)); 2652 } 2653 PetscCall(VecDestroy(&bb1)); 2654 } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2655 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2656 } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) { 2657 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2658 } else if (flag & SOR_EISENSTAT) { 2659 Vec xx1; 2660 PetscBool hasop; 2661 const PetscScalar *diag; 2662 PetscScalar *sl, scale = (omega - 2.0) / omega; 2663 PetscInt i, n; 2664 2665 if (!mat->xx1) { 2666 PetscCall(VecDuplicate(bb, &mat->xx1)); 2667 PetscCall(VecDuplicate(bb, &mat->bb1)); 2668 } 2669 xx1 = mat->xx1; 2670 bb1 = mat->bb1; 2671 2672 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 2673 2674 if (!mat->diag) { 2675 /* this is wrong for same matrix with new nonzero values */ 2676 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 2677 PetscCall(MatGetDiagonal(matin, mat->diag)); 2678 } 2679 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 2680 2681 if (hasop) { 2682 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 2683 PetscCall(VecAYPX(mat->slvec1a, scale, bb)); 2684 } else { 2685 /* 2686 These two lines are replaced by code that may be a bit faster for a good compiler 2687 PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx)); 2688 PetscCall(VecAYPX(mat->slvec1a,scale,bb)); 2689 */ 2690 PetscCall(VecGetArray(mat->slvec1a, &sl)); 2691 PetscCall(VecGetArrayRead(mat->diag, &diag)); 2692 PetscCall(VecGetArrayRead(bb, &b)); 2693 PetscCall(VecGetArray(xx, &x)); 2694 PetscCall(VecGetLocalSize(xx, &n)); 2695 if (omega == 1.0) { 2696 for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i]; 2697 PetscCall(PetscLogFlops(2.0 * n)); 2698 } else { 2699 for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i]; 2700 PetscCall(PetscLogFlops(3.0 * n)); 2701 } 2702 PetscCall(VecRestoreArray(mat->slvec1a, &sl)); 2703 PetscCall(VecRestoreArrayRead(mat->diag, &diag)); 2704 PetscCall(VecRestoreArrayRead(bb, &b)); 2705 PetscCall(VecRestoreArray(xx, &x)); 2706 } 2707 2708 /* multiply off-diagonal portion of matrix */ 2709 PetscCall(VecSet(mat->slvec1b, 0.0)); 2710 PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b)); 2711 PetscCall(VecGetArray(mat->slvec0, &from)); 2712 PetscCall(VecGetArray(xx, &x)); 2713 PetscCall(PetscArraycpy(from, x, bs * mbs)); 2714 PetscCall(VecRestoreArray(mat->slvec0, &from)); 2715 PetscCall(VecRestoreArray(xx, &x)); 2716 PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2717 PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD)); 2718 PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a)); 2719 2720 /* local sweep */ 2721 PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 2722 PetscCall(VecAXPY(xx, 1.0, xx1)); 2723 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format"); 2724 PetscFunctionReturn(PETSC_SUCCESS); 2725 } 2726 2727 /*@ 2728 MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard 2729 CSR format the local rows. 2730 2731 Collective 2732 2733 Input Parameters: 2734 + comm - MPI communicator 2735 . bs - the block size, only a block size of 1 is supported 2736 . m - number of local rows (Cannot be `PETSC_DECIDE`) 2737 . n - This value should be the same as the local size used in creating the 2738 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 2739 calculated if N is given) For square matrices n is almost always m. 2740 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 2741 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 2742 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix 2743 . j - column indices 2744 - a - matrix values 2745 2746 Output Parameter: 2747 . mat - the matrix 2748 2749 Level: intermediate 2750 2751 Notes: 2752 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 2753 thus you CANNOT change the matrix entries by changing the values of a[] after you have 2754 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 2755 2756 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 2757 2758 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 2759 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 2760 @*/ 2761 PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 2762 { 2763 PetscFunctionBegin; 2764 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 2765 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 2766 PetscCall(MatCreate(comm, mat)); 2767 PetscCall(MatSetSizes(*mat, m, n, M, N)); 2768 PetscCall(MatSetType(*mat, MATMPISBAIJ)); 2769 PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 2770 PetscFunctionReturn(PETSC_SUCCESS); 2771 } 2772 2773 /*@C 2774 MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values 2775 2776 Collective 2777 2778 Input Parameters: 2779 + B - the matrix 2780 . bs - the block size 2781 . i - the indices into j for the start of each local row (starts with zero) 2782 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2783 - v - optional values in the matrix 2784 2785 Level: advanced 2786 2787 Notes: 2788 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries 2789 and usually the numerical values as well 2790 2791 Any entries below the diagonal are ignored 2792 2793 .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ` 2794 @*/ 2795 PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2796 { 2797 PetscFunctionBegin; 2798 PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2799 PetscFunctionReturn(PETSC_SUCCESS); 2800 } 2801 2802 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 2803 { 2804 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 2805 PetscInt *indx; 2806 PetscScalar *values; 2807 2808 PetscFunctionBegin; 2809 PetscCall(MatGetSize(inmat, &m, &N)); 2810 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 2811 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data; 2812 PetscInt *dnz, *onz, mbs, Nbs, nbs; 2813 PetscInt *bindx, rmax = a->rmax, j; 2814 PetscMPIInt rank, size; 2815 2816 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2817 mbs = m / bs; 2818 Nbs = N / cbs; 2819 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 2820 nbs = n / cbs; 2821 2822 PetscCall(PetscMalloc1(rmax, &bindx)); 2823 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 2824 2825 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 2826 PetscCallMPI(MPI_Comm_rank(comm, &size)); 2827 if (rank == size - 1) { 2828 /* Check sum(nbs) = Nbs */ 2829 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 2830 } 2831 2832 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 2833 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2834 for (i = 0; i < mbs; i++) { 2835 PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 2836 nnz = nnz / bs; 2837 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 2838 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 2839 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 2840 } 2841 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2842 PetscCall(PetscFree(bindx)); 2843 2844 PetscCall(MatCreate(comm, outmat)); 2845 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 2846 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 2847 PetscCall(MatSetType(*outmat, MATSBAIJ)); 2848 PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz)); 2849 PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 2850 MatPreallocateEnd(dnz, onz); 2851 } 2852 2853 /* numeric phase */ 2854 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 2855 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 2856 2857 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE)); 2858 for (i = 0; i < m; i++) { 2859 PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2860 Ii = i + rstart; 2861 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 2862 PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values)); 2863 } 2864 PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE)); 2865 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 2866 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 2867 PetscFunctionReturn(PETSC_SUCCESS); 2868 } 2869