1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *vv; 62 Vec vB, vA; 63 const PetscScalar *va, *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayRead(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(v, &vv)); 92 PetscCall(VecRestoreArrayRead(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 721 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 722 v = amat->a; 723 jj = amat->j; 724 for (i = 0; i < amat->nz; i++) { 725 for (j = 0; j < bs; j++) { 726 col = bs * (cstart + *jj) + j; /* column index */ 727 for (row = 0; row < bs; row++) { 728 tmp[col] += PetscAbsScalar(*v); 729 v++; 730 } 731 } 732 jj++; 733 } 734 v = bmat->a; 735 jj = bmat->j; 736 for (i = 0; i < bmat->nz; i++) { 737 for (j = 0; j < bs; j++) { 738 col = bs * garray[*jj] + j; 739 for (row = 0; row < bs; row++) { 740 tmp[col] += PetscAbsScalar(*v); 741 v++; 742 } 743 } 744 jj++; 745 } 746 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 747 *nrm = 0.0; 748 for (j = 0; j < mat->cmap->N; j++) { 749 if (tmp[j] > *nrm) *nrm = tmp[j]; 750 } 751 PetscCall(PetscFree(tmp)); 752 } else if (type == NORM_INFINITY) { /* max row sum */ 753 PetscReal *sums; 754 PetscCall(PetscMalloc1(bs, &sums)); 755 sum = 0.0; 756 for (j = 0; j < amat->mbs; j++) { 757 for (row = 0; row < bs; row++) sums[row] = 0.0; 758 v = amat->a + bs2 * amat->i[j]; 759 nz = amat->i[j + 1] - amat->i[j]; 760 for (i = 0; i < nz; i++) { 761 for (col = 0; col < bs; col++) { 762 for (row = 0; row < bs; row++) { 763 sums[row] += PetscAbsScalar(*v); 764 v++; 765 } 766 } 767 } 768 v = bmat->a + bs2 * bmat->i[j]; 769 nz = bmat->i[j + 1] - bmat->i[j]; 770 for (i = 0; i < nz; i++) { 771 for (col = 0; col < bs; col++) { 772 for (row = 0; row < bs; row++) { 773 sums[row] += PetscAbsScalar(*v); 774 v++; 775 } 776 } 777 } 778 for (row = 0; row < bs; row++) { 779 if (sums[row] > sum) sum = sums[row]; 780 } 781 } 782 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 783 PetscCall(PetscFree(sums)); 784 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 785 } 786 PetscFunctionReturn(PETSC_SUCCESS); 787 } 788 789 /* 790 Creates the hash table, and sets the table 791 This table is created only once. 792 If new entries need to be added to the matrix 793 then the hash table has to be destroyed and 794 recreated. 795 */ 796 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 797 { 798 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 799 Mat A = baij->A, B = baij->B; 800 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 801 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 802 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 803 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 804 PetscInt *HT, key; 805 MatScalar **HD; 806 PetscReal tmp; 807 #if defined(PETSC_USE_INFO) 808 PetscInt ct = 0, max = 0; 809 #endif 810 811 PetscFunctionBegin; 812 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 813 814 baij->ht_size = (PetscInt)(factor * nz); 815 ht_size = baij->ht_size; 816 817 /* Allocate Memory for Hash Table */ 818 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 819 HD = baij->hd; 820 HT = baij->ht; 821 822 /* Loop Over A */ 823 for (i = 0; i < a->mbs; i++) { 824 for (j = ai[i]; j < ai[i + 1]; j++) { 825 row = i + rstart; 826 col = aj[j] + cstart; 827 828 key = row * Nbs + col + 1; 829 h1 = HASH(ht_size, key, tmp); 830 for (k = 0; k < ht_size; k++) { 831 if (!HT[(h1 + k) % ht_size]) { 832 HT[(h1 + k) % ht_size] = key; 833 HD[(h1 + k) % ht_size] = a->a + j * bs2; 834 break; 835 #if defined(PETSC_USE_INFO) 836 } else { 837 ct++; 838 #endif 839 } 840 } 841 #if defined(PETSC_USE_INFO) 842 if (k > max) max = k; 843 #endif 844 } 845 } 846 /* Loop Over B */ 847 for (i = 0; i < b->mbs; i++) { 848 for (j = bi[i]; j < bi[i + 1]; j++) { 849 row = i + rstart; 850 col = garray[bj[j]]; 851 key = row * Nbs + col + 1; 852 h1 = HASH(ht_size, key, tmp); 853 for (k = 0; k < ht_size; k++) { 854 if (!HT[(h1 + k) % ht_size]) { 855 HT[(h1 + k) % ht_size] = key; 856 HD[(h1 + k) % ht_size] = b->a + j * bs2; 857 break; 858 #if defined(PETSC_USE_INFO) 859 } else { 860 ct++; 861 #endif 862 } 863 } 864 #if defined(PETSC_USE_INFO) 865 if (k > max) max = k; 866 #endif 867 } 868 } 869 870 /* Print Summary */ 871 #if defined(PETSC_USE_INFO) 872 for (i = 0, j = 0; i < ht_size; i++) { 873 if (HT[i]) j++; 874 } 875 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 876 #endif 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 881 { 882 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 883 PetscInt nstash, reallocs; 884 885 PetscFunctionBegin; 886 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 887 888 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 889 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 890 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 891 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 892 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 893 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 898 { 899 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 900 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 901 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 902 PetscInt *row, *col; 903 PetscBool r1, r2, r3, other_disassembled; 904 MatScalar *val; 905 PetscMPIInt n; 906 907 PetscFunctionBegin; 908 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 909 if (!baij->donotstash && !mat->nooffprocentries) { 910 while (1) { 911 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 912 if (!flg) break; 913 914 for (i = 0; i < n;) { 915 /* Now identify the consecutive vals belonging to the same row */ 916 for (j = i, rstart = row[j]; j < n; j++) { 917 if (row[j] != rstart) break; 918 } 919 if (j < n) ncols = j - i; 920 else ncols = n - i; 921 /* Now assemble all these values with a single function call */ 922 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 923 i = j; 924 } 925 } 926 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 927 /* Now process the block-stash. Since the values are stashed column-oriented, 928 set the row-oriented flag to column-oriented, and after MatSetValues() 929 restore the original flags */ 930 r1 = baij->roworiented; 931 r2 = a->roworiented; 932 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 933 934 baij->roworiented = PETSC_FALSE; 935 a->roworiented = PETSC_FALSE; 936 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 937 while (1) { 938 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 939 if (!flg) break; 940 941 for (i = 0; i < n;) { 942 /* Now identify the consecutive vals belonging to the same row */ 943 for (j = i, rstart = row[j]; j < n; j++) { 944 if (row[j] != rstart) break; 945 } 946 if (j < n) ncols = j - i; 947 else ncols = n - i; 948 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 949 i = j; 950 } 951 } 952 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 953 954 baij->roworiented = r1; 955 a->roworiented = r2; 956 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 957 } 958 959 PetscCall(MatAssemblyBegin(baij->A, mode)); 960 PetscCall(MatAssemblyEnd(baij->A, mode)); 961 962 /* determine if any processor has disassembled, if so we must 963 also disassemble ourselves, in order that we may reassemble. */ 964 /* 965 if nonzero structure of submatrix B cannot change then we know that 966 no processor disassembled thus we can skip this stuff 967 */ 968 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 969 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 970 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 971 } 972 973 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 974 PetscCall(MatAssemblyBegin(baij->B, mode)); 975 PetscCall(MatAssemblyEnd(baij->B, mode)); 976 977 #if defined(PETSC_USE_INFO) 978 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 979 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 980 981 baij->ht_total_ct = 0; 982 baij->ht_insert_ct = 0; 983 } 984 #endif 985 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 986 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 987 988 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 989 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 990 } 991 992 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 993 994 baij->rowvalues = NULL; 995 996 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 997 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 998 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 999 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1000 } 1001 PetscFunctionReturn(PETSC_SUCCESS); 1002 } 1003 1004 #include <petscdraw.h> 1005 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1006 { 1007 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1008 PetscMPIInt rank = baij->rank; 1009 PetscInt bs = mat->rmap->bs; 1010 PetscBool iascii, isdraw; 1011 PetscViewer sviewer; 1012 PetscViewerFormat format; 1013 1014 PetscFunctionBegin; 1015 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1016 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1017 if (iascii) { 1018 PetscCall(PetscViewerGetFormat(viewer, &format)); 1019 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1020 MatInfo info; 1021 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1022 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1023 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1024 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1025 mat->rmap->bs, info.memory)); 1026 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1027 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1028 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1029 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1030 PetscCall(PetscViewerFlush(viewer)); 1031 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1032 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1033 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1034 PetscFunctionReturn(PETSC_SUCCESS); 1035 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1036 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1037 PetscFunctionReturn(PETSC_SUCCESS); 1038 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 } 1042 1043 if (isdraw) { 1044 PetscDraw draw; 1045 PetscBool isnull; 1046 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1047 PetscCall(PetscDrawIsNull(draw, &isnull)); 1048 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1049 } 1050 1051 { 1052 /* assemble the entire matrix onto first processor. */ 1053 Mat A; 1054 Mat_SeqBAIJ *Aloc; 1055 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1056 MatScalar *a; 1057 const char *matname; 1058 1059 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1060 /* Perhaps this should be the type of mat? */ 1061 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1062 if (rank == 0) { 1063 PetscCall(MatSetSizes(A, M, N, M, N)); 1064 } else { 1065 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1066 } 1067 PetscCall(MatSetType(A, MATMPIBAIJ)); 1068 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1069 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1070 1071 /* copy over the A part */ 1072 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1073 ai = Aloc->i; 1074 aj = Aloc->j; 1075 a = Aloc->a; 1076 PetscCall(PetscMalloc1(bs, &rvals)); 1077 1078 for (i = 0; i < mbs; i++) { 1079 rvals[0] = bs * (baij->rstartbs + i); 1080 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1081 for (j = ai[i]; j < ai[i + 1]; j++) { 1082 col = (baij->cstartbs + aj[j]) * bs; 1083 for (k = 0; k < bs; k++) { 1084 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1085 col++; 1086 a += bs; 1087 } 1088 } 1089 } 1090 /* copy over the B part */ 1091 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1092 ai = Aloc->i; 1093 aj = Aloc->j; 1094 a = Aloc->a; 1095 for (i = 0; i < mbs; i++) { 1096 rvals[0] = bs * (baij->rstartbs + i); 1097 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1098 for (j = ai[i]; j < ai[i + 1]; j++) { 1099 col = baij->garray[aj[j]] * bs; 1100 for (k = 0; k < bs; k++) { 1101 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1102 col++; 1103 a += bs; 1104 } 1105 } 1106 } 1107 PetscCall(PetscFree(rvals)); 1108 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1109 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1110 /* 1111 Everyone has to call to draw the matrix since the graphics waits are 1112 synchronized across all processors that share the PetscDraw object 1113 */ 1114 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1115 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1116 if (rank == 0) { 1117 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1118 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1119 } 1120 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1121 PetscCall(MatDestroy(&A)); 1122 } 1123 PetscFunctionReturn(PETSC_SUCCESS); 1124 } 1125 1126 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1127 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1128 { 1129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1130 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1131 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1132 const PetscInt *garray = aij->garray; 1133 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1134 PetscCount nz, hnz; 1135 PetscInt *rowlens, *colidxs; 1136 PetscScalar *matvals; 1137 PetscMPIInt rank; 1138 1139 PetscFunctionBegin; 1140 PetscCall(PetscViewerSetUp(viewer)); 1141 1142 M = mat->rmap->N; 1143 N = mat->cmap->N; 1144 m = mat->rmap->n; 1145 rs = mat->rmap->rstart; 1146 cs = mat->cmap->rstart; 1147 bs = mat->rmap->bs; 1148 nz = bs * bs * (A->nz + B->nz); 1149 1150 /* write matrix header */ 1151 header[0] = MAT_FILE_CLASSID; 1152 header[1] = M; 1153 header[2] = N; 1154 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1155 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1156 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1157 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1158 1159 /* fill in and store row lengths */ 1160 PetscCall(PetscMalloc1(m, &rowlens)); 1161 for (cnt = 0, i = 0; i < A->mbs; i++) 1162 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1163 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1164 PetscCall(PetscFree(rowlens)); 1165 1166 /* fill in and store column indices */ 1167 PetscCall(PetscMalloc1(nz, &colidxs)); 1168 for (cnt = 0, i = 0; i < A->mbs; i++) { 1169 for (k = 0; k < bs; k++) { 1170 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1171 if (garray[B->j[jb]] > cs / bs) break; 1172 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1173 } 1174 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1175 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1176 for (; jb < B->i[i + 1]; jb++) 1177 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1178 } 1179 } 1180 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1181 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1182 PetscCall(PetscFree(colidxs)); 1183 1184 /* fill in and store nonzero values */ 1185 PetscCall(PetscMalloc1(nz, &matvals)); 1186 for (cnt = 0, i = 0; i < A->mbs; i++) { 1187 for (k = 0; k < bs; k++) { 1188 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1189 if (garray[B->j[jb]] > cs / bs) break; 1190 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1191 } 1192 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1193 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1194 for (; jb < B->i[i + 1]; jb++) 1195 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1196 } 1197 } 1198 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1199 PetscCall(PetscFree(matvals)); 1200 1201 /* write block size option to the viewer's .info file */ 1202 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1207 { 1208 PetscBool iascii, isdraw, issocket, isbinary; 1209 1210 PetscFunctionBegin; 1211 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1213 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1214 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1215 if (iascii || isdraw || issocket) { 1216 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1217 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1218 PetscFunctionReturn(PETSC_SUCCESS); 1219 } 1220 1221 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1222 { 1223 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1224 PetscInt nt; 1225 1226 PetscFunctionBegin; 1227 PetscCall(VecGetLocalSize(xx, &nt)); 1228 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1229 PetscCall(VecGetLocalSize(yy, &nt)); 1230 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1231 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1232 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1233 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1234 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1235 PetscFunctionReturn(PETSC_SUCCESS); 1236 } 1237 1238 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1239 { 1240 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1241 1242 PetscFunctionBegin; 1243 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1244 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1245 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1246 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1247 PetscFunctionReturn(PETSC_SUCCESS); 1248 } 1249 1250 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1251 { 1252 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1253 1254 PetscFunctionBegin; 1255 /* do nondiagonal part */ 1256 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1257 /* do local part */ 1258 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1259 /* add partial results together */ 1260 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1261 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1262 PetscFunctionReturn(PETSC_SUCCESS); 1263 } 1264 1265 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1266 { 1267 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1268 1269 PetscFunctionBegin; 1270 /* do nondiagonal part */ 1271 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1272 /* do local part */ 1273 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1274 /* add partial results together */ 1275 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1276 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1277 PetscFunctionReturn(PETSC_SUCCESS); 1278 } 1279 1280 /* 1281 This only works correctly for square matrices where the subblock A->A is the 1282 diagonal block 1283 */ 1284 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1285 { 1286 PetscFunctionBegin; 1287 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1288 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1289 PetscFunctionReturn(PETSC_SUCCESS); 1290 } 1291 1292 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1293 { 1294 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1295 1296 PetscFunctionBegin; 1297 PetscCall(MatScale(a->A, aa)); 1298 PetscCall(MatScale(a->B, aa)); 1299 PetscFunctionReturn(PETSC_SUCCESS); 1300 } 1301 1302 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1303 { 1304 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1305 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1306 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1307 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1308 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1309 1310 PetscFunctionBegin; 1311 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1312 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1313 mat->getrowactive = PETSC_TRUE; 1314 1315 if (!mat->rowvalues && (idx || v)) { 1316 /* 1317 allocate enough space to hold information from the longest row. 1318 */ 1319 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1320 PetscInt max = 1, mbs = mat->mbs, tmp; 1321 for (i = 0; i < mbs; i++) { 1322 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1323 if (max < tmp) max = tmp; 1324 } 1325 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1326 } 1327 lrow = row - brstart; 1328 1329 pvA = &vworkA; 1330 pcA = &cworkA; 1331 pvB = &vworkB; 1332 pcB = &cworkB; 1333 if (!v) { 1334 pvA = NULL; 1335 pvB = NULL; 1336 } 1337 if (!idx) { 1338 pcA = NULL; 1339 if (!v) pcB = NULL; 1340 } 1341 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1342 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1343 nztot = nzA + nzB; 1344 1345 cmap = mat->garray; 1346 if (v || idx) { 1347 if (nztot) { 1348 /* Sort by increasing column numbers, assuming A and B already sorted */ 1349 PetscInt imark = -1; 1350 if (v) { 1351 *v = v_p = mat->rowvalues; 1352 for (i = 0; i < nzB; i++) { 1353 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1354 else break; 1355 } 1356 imark = i; 1357 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1358 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1359 } 1360 if (idx) { 1361 *idx = idx_p = mat->rowindices; 1362 if (imark > -1) { 1363 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1364 } else { 1365 for (i = 0; i < nzB; i++) { 1366 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1367 else break; 1368 } 1369 imark = i; 1370 } 1371 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1372 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1373 } 1374 } else { 1375 if (idx) *idx = NULL; 1376 if (v) *v = NULL; 1377 } 1378 } 1379 *nz = nztot; 1380 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1381 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1382 PetscFunctionReturn(PETSC_SUCCESS); 1383 } 1384 1385 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1386 { 1387 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1388 1389 PetscFunctionBegin; 1390 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1391 baij->getrowactive = PETSC_FALSE; 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1396 { 1397 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1398 1399 PetscFunctionBegin; 1400 PetscCall(MatZeroEntries(l->A)); 1401 PetscCall(MatZeroEntries(l->B)); 1402 PetscFunctionReturn(PETSC_SUCCESS); 1403 } 1404 1405 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1406 { 1407 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1408 Mat A = a->A, B = a->B; 1409 PetscLogDouble isend[5], irecv[5]; 1410 1411 PetscFunctionBegin; 1412 info->block_size = (PetscReal)matin->rmap->bs; 1413 1414 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1415 1416 isend[0] = info->nz_used; 1417 isend[1] = info->nz_allocated; 1418 isend[2] = info->nz_unneeded; 1419 isend[3] = info->memory; 1420 isend[4] = info->mallocs; 1421 1422 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1423 1424 isend[0] += info->nz_used; 1425 isend[1] += info->nz_allocated; 1426 isend[2] += info->nz_unneeded; 1427 isend[3] += info->memory; 1428 isend[4] += info->mallocs; 1429 1430 if (flag == MAT_LOCAL) { 1431 info->nz_used = isend[0]; 1432 info->nz_allocated = isend[1]; 1433 info->nz_unneeded = isend[2]; 1434 info->memory = isend[3]; 1435 info->mallocs = isend[4]; 1436 } else if (flag == MAT_GLOBAL_MAX) { 1437 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1438 1439 info->nz_used = irecv[0]; 1440 info->nz_allocated = irecv[1]; 1441 info->nz_unneeded = irecv[2]; 1442 info->memory = irecv[3]; 1443 info->mallocs = irecv[4]; 1444 } else if (flag == MAT_GLOBAL_SUM) { 1445 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1446 1447 info->nz_used = irecv[0]; 1448 info->nz_allocated = irecv[1]; 1449 info->nz_unneeded = irecv[2]; 1450 info->memory = irecv[3]; 1451 info->mallocs = irecv[4]; 1452 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1453 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1454 info->fill_ratio_needed = 0; 1455 info->factor_mallocs = 0; 1456 PetscFunctionReturn(PETSC_SUCCESS); 1457 } 1458 1459 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1460 { 1461 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1462 1463 PetscFunctionBegin; 1464 switch (op) { 1465 case MAT_NEW_NONZERO_LOCATIONS: 1466 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1467 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1468 case MAT_KEEP_NONZERO_PATTERN: 1469 case MAT_NEW_NONZERO_LOCATION_ERR: 1470 MatCheckPreallocated(A, 1); 1471 PetscCall(MatSetOption(a->A, op, flg)); 1472 PetscCall(MatSetOption(a->B, op, flg)); 1473 break; 1474 case MAT_ROW_ORIENTED: 1475 MatCheckPreallocated(A, 1); 1476 a->roworiented = flg; 1477 1478 PetscCall(MatSetOption(a->A, op, flg)); 1479 PetscCall(MatSetOption(a->B, op, flg)); 1480 break; 1481 case MAT_IGNORE_OFF_PROC_ENTRIES: 1482 a->donotstash = flg; 1483 break; 1484 case MAT_USE_HASH_TABLE: 1485 a->ht_flag = flg; 1486 a->ht_fact = 1.39; 1487 break; 1488 case MAT_SYMMETRIC: 1489 case MAT_STRUCTURALLY_SYMMETRIC: 1490 case MAT_HERMITIAN: 1491 case MAT_SYMMETRY_ETERNAL: 1492 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1493 case MAT_SPD_ETERNAL: 1494 /* if the diagonal matrix is square it inherits some of the properties above */ 1495 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1496 break; 1497 default: 1498 break; 1499 } 1500 PetscFunctionReturn(PETSC_SUCCESS); 1501 } 1502 1503 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1504 { 1505 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1506 Mat_SeqBAIJ *Aloc; 1507 Mat B; 1508 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1509 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1510 MatScalar *a; 1511 1512 PetscFunctionBegin; 1513 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1514 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1515 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1516 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1517 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1518 /* Do not know preallocation information, but must set block size */ 1519 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1520 } else { 1521 B = *matout; 1522 } 1523 1524 /* copy over the A part */ 1525 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1526 ai = Aloc->i; 1527 aj = Aloc->j; 1528 a = Aloc->a; 1529 PetscCall(PetscMalloc1(bs, &rvals)); 1530 1531 for (i = 0; i < mbs; i++) { 1532 rvals[0] = bs * (baij->rstartbs + i); 1533 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 col = (baij->cstartbs + aj[j]) * bs; 1536 for (k = 0; k < bs; k++) { 1537 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1538 1539 col++; 1540 a += bs; 1541 } 1542 } 1543 } 1544 /* copy over the B part */ 1545 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1546 ai = Aloc->i; 1547 aj = Aloc->j; 1548 a = Aloc->a; 1549 for (i = 0; i < mbs; i++) { 1550 rvals[0] = bs * (baij->rstartbs + i); 1551 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1552 for (j = ai[i]; j < ai[i + 1]; j++) { 1553 col = baij->garray[aj[j]] * bs; 1554 for (k = 0; k < bs; k++) { 1555 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1556 col++; 1557 a += bs; 1558 } 1559 } 1560 } 1561 PetscCall(PetscFree(rvals)); 1562 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1563 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1564 1565 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1566 else PetscCall(MatHeaderMerge(A, &B)); 1567 PetscFunctionReturn(PETSC_SUCCESS); 1568 } 1569 1570 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1571 { 1572 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1573 Mat a = baij->A, b = baij->B; 1574 PetscInt s1, s2, s3; 1575 1576 PetscFunctionBegin; 1577 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1578 if (rr) { 1579 PetscCall(VecGetLocalSize(rr, &s1)); 1580 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1581 /* Overlap communication with computation. */ 1582 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1583 } 1584 if (ll) { 1585 PetscCall(VecGetLocalSize(ll, &s1)); 1586 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1587 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1588 } 1589 /* scale the diagonal block */ 1590 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1591 1592 if (rr) { 1593 /* Do a scatter end and then right scale the off-diagonal block */ 1594 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1595 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1596 } 1597 PetscFunctionReturn(PETSC_SUCCESS); 1598 } 1599 1600 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1601 { 1602 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1603 PetscInt *lrows; 1604 PetscInt r, len; 1605 PetscBool cong; 1606 1607 PetscFunctionBegin; 1608 /* get locally owned rows */ 1609 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1610 /* fix right-hand side if needed */ 1611 if (x && b) { 1612 const PetscScalar *xx; 1613 PetscScalar *bb; 1614 1615 PetscCall(VecGetArrayRead(x, &xx)); 1616 PetscCall(VecGetArray(b, &bb)); 1617 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1618 PetscCall(VecRestoreArrayRead(x, &xx)); 1619 PetscCall(VecRestoreArray(b, &bb)); 1620 } 1621 1622 /* actually zap the local rows */ 1623 /* 1624 Zero the required rows. If the "diagonal block" of the matrix 1625 is square and the user wishes to set the diagonal we use separate 1626 code so that MatSetValues() is not called for each diagonal allocating 1627 new memory, thus calling lots of mallocs and slowing things down. 1628 1629 */ 1630 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1631 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1632 PetscCall(MatHasCongruentLayouts(A, &cong)); 1633 if ((diag != 0.0) && cong) { 1634 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1635 } else if (diag != 0.0) { 1636 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1637 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1638 for (r = 0; r < len; ++r) { 1639 const PetscInt row = lrows[r] + A->rmap->rstart; 1640 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1641 } 1642 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1643 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1644 } else { 1645 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1646 } 1647 PetscCall(PetscFree(lrows)); 1648 1649 /* only change matrix nonzero state if pattern was allowed to be changed */ 1650 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1651 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1652 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1653 } 1654 PetscFunctionReturn(PETSC_SUCCESS); 1655 } 1656 1657 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1658 { 1659 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1660 PetscMPIInt n, p = 0; 1661 PetscInt i, j, k, r, len = 0, row, col, count; 1662 PetscInt *lrows, *owners = A->rmap->range; 1663 PetscSFNode *rrows; 1664 PetscSF sf; 1665 const PetscScalar *xx; 1666 PetscScalar *bb, *mask; 1667 Vec xmask, lmask; 1668 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1669 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1670 PetscScalar *aa; 1671 1672 PetscFunctionBegin; 1673 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1674 /* Create SF where leaves are input rows and roots are owned rows */ 1675 PetscCall(PetscMalloc1(n, &lrows)); 1676 for (r = 0; r < n; ++r) lrows[r] = -1; 1677 PetscCall(PetscMalloc1(N, &rrows)); 1678 for (r = 0; r < N; ++r) { 1679 const PetscInt idx = rows[r]; 1680 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1681 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1682 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1683 } 1684 rrows[r].rank = p; 1685 rrows[r].index = rows[r] - owners[p]; 1686 } 1687 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1688 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1689 /* Collect flags for rows to be zeroed */ 1690 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1691 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1692 PetscCall(PetscSFDestroy(&sf)); 1693 /* Compress and put in row numbers */ 1694 for (r = 0; r < n; ++r) 1695 if (lrows[r] >= 0) lrows[len++] = r; 1696 /* zero diagonal part of matrix */ 1697 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1698 /* handle off-diagonal part of matrix */ 1699 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1700 PetscCall(VecDuplicate(l->lvec, &lmask)); 1701 PetscCall(VecGetArray(xmask, &bb)); 1702 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1703 PetscCall(VecRestoreArray(xmask, &bb)); 1704 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1705 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1706 PetscCall(VecDestroy(&xmask)); 1707 if (x) { 1708 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1709 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1710 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1711 PetscCall(VecGetArray(b, &bb)); 1712 } 1713 PetscCall(VecGetArray(lmask, &mask)); 1714 /* remove zeroed rows of off-diagonal matrix */ 1715 for (i = 0; i < len; ++i) { 1716 row = lrows[i]; 1717 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1718 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1719 for (k = 0; k < count; ++k) { 1720 aa[0] = 0.0; 1721 aa += bs; 1722 } 1723 } 1724 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1725 for (i = 0; i < l->B->rmap->N; ++i) { 1726 row = i / bs; 1727 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1728 for (k = 0; k < bs; ++k) { 1729 col = bs * baij->j[j] + k; 1730 if (PetscAbsScalar(mask[col])) { 1731 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1732 if (x) bb[i] -= aa[0] * xx[col]; 1733 aa[0] = 0.0; 1734 } 1735 } 1736 } 1737 } 1738 if (x) { 1739 PetscCall(VecRestoreArray(b, &bb)); 1740 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1741 } 1742 PetscCall(VecRestoreArray(lmask, &mask)); 1743 PetscCall(VecDestroy(&lmask)); 1744 PetscCall(PetscFree(lrows)); 1745 1746 /* only change matrix nonzero state if pattern was allowed to be changed */ 1747 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1748 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1749 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1750 } 1751 PetscFunctionReturn(PETSC_SUCCESS); 1752 } 1753 1754 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1755 { 1756 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1757 1758 PetscFunctionBegin; 1759 PetscCall(MatSetUnfactored(a->A)); 1760 PetscFunctionReturn(PETSC_SUCCESS); 1761 } 1762 1763 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1764 1765 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1766 { 1767 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1768 Mat a, b, c, d; 1769 PetscBool flg; 1770 1771 PetscFunctionBegin; 1772 a = matA->A; 1773 b = matA->B; 1774 c = matB->A; 1775 d = matB->B; 1776 1777 PetscCall(MatEqual(a, c, &flg)); 1778 if (flg) PetscCall(MatEqual(b, d, &flg)); 1779 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1780 PetscFunctionReturn(PETSC_SUCCESS); 1781 } 1782 1783 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1784 { 1785 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1786 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1787 1788 PetscFunctionBegin; 1789 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1790 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1791 PetscCall(MatCopy_Basic(A, B, str)); 1792 } else { 1793 PetscCall(MatCopy(a->A, b->A, str)); 1794 PetscCall(MatCopy(a->B, b->B, str)); 1795 } 1796 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1797 PetscFunctionReturn(PETSC_SUCCESS); 1798 } 1799 1800 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1801 { 1802 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1803 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1804 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1805 1806 PetscFunctionBegin; 1807 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1808 PetscFunctionReturn(PETSC_SUCCESS); 1809 } 1810 1811 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1812 { 1813 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1814 PetscBLASInt bnz, one = 1; 1815 Mat_SeqBAIJ *x, *y; 1816 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1817 1818 PetscFunctionBegin; 1819 if (str == SAME_NONZERO_PATTERN) { 1820 PetscScalar alpha = a; 1821 x = (Mat_SeqBAIJ *)xx->A->data; 1822 y = (Mat_SeqBAIJ *)yy->A->data; 1823 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1824 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1825 x = (Mat_SeqBAIJ *)xx->B->data; 1826 y = (Mat_SeqBAIJ *)yy->B->data; 1827 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1828 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1829 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1830 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1831 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1832 } else { 1833 Mat B; 1834 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1835 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1836 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1837 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1838 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1839 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1840 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1841 PetscCall(MatSetType(B, MATMPIBAIJ)); 1842 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1843 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1844 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1845 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1846 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1847 PetscCall(MatHeaderMerge(Y, &B)); 1848 PetscCall(PetscFree(nnz_d)); 1849 PetscCall(PetscFree(nnz_o)); 1850 } 1851 PetscFunctionReturn(PETSC_SUCCESS); 1852 } 1853 1854 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1855 { 1856 PetscFunctionBegin; 1857 if (PetscDefined(USE_COMPLEX)) { 1858 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1859 1860 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1861 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1862 } 1863 PetscFunctionReturn(PETSC_SUCCESS); 1864 } 1865 1866 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1867 { 1868 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1869 1870 PetscFunctionBegin; 1871 PetscCall(MatRealPart(a->A)); 1872 PetscCall(MatRealPart(a->B)); 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1877 { 1878 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1879 1880 PetscFunctionBegin; 1881 PetscCall(MatImaginaryPart(a->A)); 1882 PetscCall(MatImaginaryPart(a->B)); 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1887 { 1888 IS iscol_local; 1889 PetscInt csize; 1890 1891 PetscFunctionBegin; 1892 PetscCall(ISGetLocalSize(iscol, &csize)); 1893 if (call == MAT_REUSE_MATRIX) { 1894 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1895 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1896 } else { 1897 PetscCall(ISAllGather(iscol, &iscol_local)); 1898 } 1899 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1900 if (call == MAT_INITIAL_MATRIX) { 1901 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1902 PetscCall(ISDestroy(&iscol_local)); 1903 } 1904 PetscFunctionReturn(PETSC_SUCCESS); 1905 } 1906 1907 /* 1908 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1909 in local and then by concatenating the local matrices the end result. 1910 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1911 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1912 */ 1913 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1914 { 1915 PetscMPIInt rank, size; 1916 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1917 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1918 Mat M, Mreuse; 1919 MatScalar *vwork, *aa; 1920 MPI_Comm comm; 1921 IS isrow_new, iscol_new; 1922 Mat_SeqBAIJ *aij; 1923 1924 PetscFunctionBegin; 1925 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1926 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1927 PetscCallMPI(MPI_Comm_size(comm, &size)); 1928 /* The compression and expansion should be avoided. Doesn't point 1929 out errors, might change the indices, hence buggey */ 1930 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1931 if (isrow == iscol) { 1932 iscol_new = isrow_new; 1933 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1934 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1935 1936 if (call == MAT_REUSE_MATRIX) { 1937 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1938 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1939 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1940 } else { 1941 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1942 } 1943 PetscCall(ISDestroy(&isrow_new)); 1944 PetscCall(ISDestroy(&iscol_new)); 1945 /* 1946 m - number of local rows 1947 n - number of columns (same on all processors) 1948 rstart - first row in new global matrix generated 1949 */ 1950 PetscCall(MatGetBlockSize(mat, &bs)); 1951 PetscCall(MatGetSize(Mreuse, &m, &n)); 1952 m = m / bs; 1953 n = n / bs; 1954 1955 if (call == MAT_INITIAL_MATRIX) { 1956 aij = (Mat_SeqBAIJ *)Mreuse->data; 1957 ii = aij->i; 1958 jj = aij->j; 1959 1960 /* 1961 Determine the number of non-zeros in the diagonal and off-diagonal 1962 portions of the matrix in order to do correct preallocation 1963 */ 1964 1965 /* first get start and end of "diagonal" columns */ 1966 if (csize == PETSC_DECIDE) { 1967 PetscCall(ISGetSize(isrow, &mglobal)); 1968 if (mglobal == n * bs) { /* square matrix */ 1969 nlocal = m; 1970 } else { 1971 nlocal = n / size + ((n % size) > rank); 1972 } 1973 } else { 1974 nlocal = csize / bs; 1975 } 1976 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1977 rstart = rend - nlocal; 1978 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1979 1980 /* next, compute all the lengths */ 1981 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1982 for (i = 0; i < m; i++) { 1983 jend = ii[i + 1] - ii[i]; 1984 olen = 0; 1985 dlen = 0; 1986 for (j = 0; j < jend; j++) { 1987 if (*jj < rstart || *jj >= rend) olen++; 1988 else dlen++; 1989 jj++; 1990 } 1991 olens[i] = olen; 1992 dlens[i] = dlen; 1993 } 1994 PetscCall(MatCreate(comm, &M)); 1995 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1996 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 1997 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1998 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1999 PetscCall(PetscFree2(dlens, olens)); 2000 } else { 2001 PetscInt ml, nl; 2002 2003 M = *newmat; 2004 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2005 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2006 PetscCall(MatZeroEntries(M)); 2007 /* 2008 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2009 rather than the slower MatSetValues(). 2010 */ 2011 M->was_assembled = PETSC_TRUE; 2012 M->assembled = PETSC_FALSE; 2013 } 2014 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2015 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2016 aij = (Mat_SeqBAIJ *)Mreuse->data; 2017 ii = aij->i; 2018 jj = aij->j; 2019 aa = aij->a; 2020 for (i = 0; i < m; i++) { 2021 row = rstart / bs + i; 2022 nz = ii[i + 1] - ii[i]; 2023 cwork = jj; 2024 jj = PetscSafePointerPlusOffset(jj, nz); 2025 vwork = aa; 2026 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2027 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2028 } 2029 2030 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2031 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2032 *newmat = M; 2033 2034 /* save submatrix used in processor for next request */ 2035 if (call == MAT_INITIAL_MATRIX) { 2036 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2037 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2038 } 2039 PetscFunctionReturn(PETSC_SUCCESS); 2040 } 2041 2042 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2043 { 2044 MPI_Comm comm, pcomm; 2045 PetscInt clocal_size, nrows; 2046 const PetscInt *rows; 2047 PetscMPIInt size; 2048 IS crowp, lcolp; 2049 2050 PetscFunctionBegin; 2051 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2052 /* make a collective version of 'rowp' */ 2053 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2054 if (pcomm == comm) { 2055 crowp = rowp; 2056 } else { 2057 PetscCall(ISGetSize(rowp, &nrows)); 2058 PetscCall(ISGetIndices(rowp, &rows)); 2059 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2060 PetscCall(ISRestoreIndices(rowp, &rows)); 2061 } 2062 PetscCall(ISSetPermutation(crowp)); 2063 /* make a local version of 'colp' */ 2064 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2065 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2066 if (size == 1) { 2067 lcolp = colp; 2068 } else { 2069 PetscCall(ISAllGather(colp, &lcolp)); 2070 } 2071 PetscCall(ISSetPermutation(lcolp)); 2072 /* now we just get the submatrix */ 2073 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2074 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2075 /* clean up */ 2076 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2077 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2082 { 2083 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2084 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2085 2086 PetscFunctionBegin; 2087 if (nghosts) *nghosts = B->nbs; 2088 if (ghosts) *ghosts = baij->garray; 2089 PetscFunctionReturn(PETSC_SUCCESS); 2090 } 2091 2092 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2093 { 2094 Mat B; 2095 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2096 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2097 Mat_SeqAIJ *b; 2098 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2099 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2100 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2101 2102 PetscFunctionBegin; 2103 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2104 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2105 2106 /* Tell every processor the number of nonzeros per row */ 2107 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2108 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2109 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2110 displs = recvcounts + size; 2111 for (i = 0; i < size; i++) { 2112 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2113 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2114 } 2115 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2116 /* Create the sequential matrix of the same type as the local block diagonal */ 2117 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2118 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2119 PetscCall(MatSetType(B, MATSEQAIJ)); 2120 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2121 b = (Mat_SeqAIJ *)B->data; 2122 2123 /* Copy my part of matrix column indices over */ 2124 sendcount = ad->nz + bd->nz; 2125 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2126 a_jsendbuf = ad->j; 2127 b_jsendbuf = bd->j; 2128 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2129 cnt = 0; 2130 for (i = 0; i < n; i++) { 2131 /* put in lower diagonal portion */ 2132 m = bd->i[i + 1] - bd->i[i]; 2133 while (m > 0) { 2134 /* is it above diagonal (in bd (compressed) numbering) */ 2135 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2136 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2137 m--; 2138 } 2139 2140 /* put in diagonal portion */ 2141 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2142 2143 /* put in upper diagonal portion */ 2144 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2145 } 2146 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2147 2148 /* Gather all column indices to all processors */ 2149 for (i = 0; i < size; i++) { 2150 recvcounts[i] = 0; 2151 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2152 } 2153 displs[0] = 0; 2154 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2155 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2156 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2157 /* set the b->ilen (length of each row) values */ 2158 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2159 /* set the b->i indices */ 2160 b->i[0] = 0; 2161 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2162 PetscCall(PetscFree(lens)); 2163 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2164 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2165 PetscCall(PetscFree(recvcounts)); 2166 2167 PetscCall(MatPropagateSymmetryOptions(A, B)); 2168 *newmat = B; 2169 PetscFunctionReturn(PETSC_SUCCESS); 2170 } 2171 2172 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2173 { 2174 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2175 Vec bb1 = NULL; 2176 2177 PetscFunctionBegin; 2178 if (flag == SOR_APPLY_UPPER) { 2179 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2180 PetscFunctionReturn(PETSC_SUCCESS); 2181 } 2182 2183 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2184 2185 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2186 if (flag & SOR_ZERO_INITIAL_GUESS) { 2187 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2188 its--; 2189 } 2190 2191 while (its--) { 2192 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2193 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2194 2195 /* update rhs: bb1 = bb - B*x */ 2196 PetscCall(VecScale(mat->lvec, -1.0)); 2197 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2198 2199 /* local sweep */ 2200 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2201 } 2202 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2203 if (flag & SOR_ZERO_INITIAL_GUESS) { 2204 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2205 its--; 2206 } 2207 while (its--) { 2208 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2209 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2210 2211 /* update rhs: bb1 = bb - B*x */ 2212 PetscCall(VecScale(mat->lvec, -1.0)); 2213 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2214 2215 /* local sweep */ 2216 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2217 } 2218 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2219 if (flag & SOR_ZERO_INITIAL_GUESS) { 2220 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2221 its--; 2222 } 2223 while (its--) { 2224 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2225 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2226 2227 /* update rhs: bb1 = bb - B*x */ 2228 PetscCall(VecScale(mat->lvec, -1.0)); 2229 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2230 2231 /* local sweep */ 2232 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2233 } 2234 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2235 2236 PetscCall(VecDestroy(&bb1)); 2237 PetscFunctionReturn(PETSC_SUCCESS); 2238 } 2239 2240 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2241 { 2242 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2243 PetscInt m, N, i, *garray = aij->garray; 2244 PetscInt ib, jb, bs = A->rmap->bs; 2245 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2246 MatScalar *a_val = a_aij->a; 2247 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2248 MatScalar *b_val = b_aij->a; 2249 PetscReal *work; 2250 2251 PetscFunctionBegin; 2252 PetscCall(MatGetSize(A, &m, &N)); 2253 PetscCall(PetscCalloc1(N, &work)); 2254 if (type == NORM_2) { 2255 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2256 for (jb = 0; jb < bs; jb++) { 2257 for (ib = 0; ib < bs; ib++) { 2258 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2259 a_val++; 2260 } 2261 } 2262 } 2263 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2264 for (jb = 0; jb < bs; jb++) { 2265 for (ib = 0; ib < bs; ib++) { 2266 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2267 b_val++; 2268 } 2269 } 2270 } 2271 } else if (type == NORM_1) { 2272 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2273 for (jb = 0; jb < bs; jb++) { 2274 for (ib = 0; ib < bs; ib++) { 2275 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2276 a_val++; 2277 } 2278 } 2279 } 2280 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2281 for (jb = 0; jb < bs; jb++) { 2282 for (ib = 0; ib < bs; ib++) { 2283 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2284 b_val++; 2285 } 2286 } 2287 } 2288 } else if (type == NORM_INFINITY) { 2289 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2290 for (jb = 0; jb < bs; jb++) { 2291 for (ib = 0; ib < bs; ib++) { 2292 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2293 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2294 a_val++; 2295 } 2296 } 2297 } 2298 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2299 for (jb = 0; jb < bs; jb++) { 2300 for (ib = 0; ib < bs; ib++) { 2301 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2302 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2303 b_val++; 2304 } 2305 } 2306 } 2307 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2308 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2309 for (jb = 0; jb < bs; jb++) { 2310 for (ib = 0; ib < bs; ib++) { 2311 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2312 a_val++; 2313 } 2314 } 2315 } 2316 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2317 for (jb = 0; jb < bs; jb++) { 2318 for (ib = 0; ib < bs; ib++) { 2319 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2320 b_val++; 2321 } 2322 } 2323 } 2324 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2325 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2326 for (jb = 0; jb < bs; jb++) { 2327 for (ib = 0; ib < bs; ib++) { 2328 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2329 a_val++; 2330 } 2331 } 2332 } 2333 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2334 for (jb = 0; jb < bs; jb++) { 2335 for (ib = 0; ib < bs; ib++) { 2336 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2337 b_val++; 2338 } 2339 } 2340 } 2341 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2342 if (type == NORM_INFINITY) { 2343 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2344 } else { 2345 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2346 } 2347 PetscCall(PetscFree(work)); 2348 if (type == NORM_2) { 2349 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2350 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2351 for (i = 0; i < N; i++) reductions[i] /= m; 2352 } 2353 PetscFunctionReturn(PETSC_SUCCESS); 2354 } 2355 2356 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2357 { 2358 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2359 2360 PetscFunctionBegin; 2361 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2362 A->factorerrortype = a->A->factorerrortype; 2363 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2364 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2365 PetscFunctionReturn(PETSC_SUCCESS); 2366 } 2367 2368 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2369 { 2370 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2371 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2372 2373 PetscFunctionBegin; 2374 if (!Y->preallocated) { 2375 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2376 } else if (!aij->nz) { 2377 PetscInt nonew = aij->nonew; 2378 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2379 aij->nonew = nonew; 2380 } 2381 PetscCall(MatShift_Basic(Y, a)); 2382 PetscFunctionReturn(PETSC_SUCCESS); 2383 } 2384 2385 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2386 { 2387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2388 2389 PetscFunctionBegin; 2390 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2391 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2392 if (d) { 2393 PetscInt rstart; 2394 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2395 *d += rstart / A->rmap->bs; 2396 } 2397 PetscFunctionReturn(PETSC_SUCCESS); 2398 } 2399 2400 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2401 { 2402 PetscFunctionBegin; 2403 *a = ((Mat_MPIBAIJ *)A->data)->A; 2404 PetscFunctionReturn(PETSC_SUCCESS); 2405 } 2406 2407 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2408 { 2409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2410 2411 PetscFunctionBegin; 2412 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2413 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2414 PetscFunctionReturn(PETSC_SUCCESS); 2415 } 2416 2417 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2418 MatGetRow_MPIBAIJ, 2419 MatRestoreRow_MPIBAIJ, 2420 MatMult_MPIBAIJ, 2421 /* 4*/ MatMultAdd_MPIBAIJ, 2422 MatMultTranspose_MPIBAIJ, 2423 MatMultTransposeAdd_MPIBAIJ, 2424 NULL, 2425 NULL, 2426 NULL, 2427 /*10*/ NULL, 2428 NULL, 2429 NULL, 2430 MatSOR_MPIBAIJ, 2431 MatTranspose_MPIBAIJ, 2432 /*15*/ MatGetInfo_MPIBAIJ, 2433 MatEqual_MPIBAIJ, 2434 MatGetDiagonal_MPIBAIJ, 2435 MatDiagonalScale_MPIBAIJ, 2436 MatNorm_MPIBAIJ, 2437 /*20*/ MatAssemblyBegin_MPIBAIJ, 2438 MatAssemblyEnd_MPIBAIJ, 2439 MatSetOption_MPIBAIJ, 2440 MatZeroEntries_MPIBAIJ, 2441 /*24*/ MatZeroRows_MPIBAIJ, 2442 NULL, 2443 NULL, 2444 NULL, 2445 NULL, 2446 /*29*/ MatSetUp_MPI_Hash, 2447 NULL, 2448 NULL, 2449 MatGetDiagonalBlock_MPIBAIJ, 2450 NULL, 2451 /*34*/ MatDuplicate_MPIBAIJ, 2452 NULL, 2453 NULL, 2454 NULL, 2455 NULL, 2456 /*39*/ MatAXPY_MPIBAIJ, 2457 MatCreateSubMatrices_MPIBAIJ, 2458 MatIncreaseOverlap_MPIBAIJ, 2459 MatGetValues_MPIBAIJ, 2460 MatCopy_MPIBAIJ, 2461 /*44*/ NULL, 2462 MatScale_MPIBAIJ, 2463 MatShift_MPIBAIJ, 2464 NULL, 2465 MatZeroRowsColumns_MPIBAIJ, 2466 /*49*/ NULL, 2467 NULL, 2468 NULL, 2469 NULL, 2470 NULL, 2471 /*54*/ MatFDColoringCreate_MPIXAIJ, 2472 NULL, 2473 MatSetUnfactored_MPIBAIJ, 2474 MatPermute_MPIBAIJ, 2475 MatSetValuesBlocked_MPIBAIJ, 2476 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2477 MatDestroy_MPIBAIJ, 2478 MatView_MPIBAIJ, 2479 NULL, 2480 NULL, 2481 /*64*/ NULL, 2482 NULL, 2483 NULL, 2484 NULL, 2485 NULL, 2486 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2487 NULL, 2488 NULL, 2489 NULL, 2490 NULL, 2491 /*74*/ NULL, 2492 MatFDColoringApply_BAIJ, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*79*/ NULL, 2497 NULL, 2498 NULL, 2499 NULL, 2500 MatLoad_MPIBAIJ, 2501 /*84*/ NULL, 2502 NULL, 2503 NULL, 2504 NULL, 2505 NULL, 2506 /*89*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 /*94*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*99*/ NULL, 2517 NULL, 2518 NULL, 2519 MatConjugate_MPIBAIJ, 2520 NULL, 2521 /*104*/ NULL, 2522 MatRealPart_MPIBAIJ, 2523 MatImaginaryPart_MPIBAIJ, 2524 NULL, 2525 NULL, 2526 /*109*/ NULL, 2527 NULL, 2528 NULL, 2529 NULL, 2530 MatMissingDiagonal_MPIBAIJ, 2531 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2532 NULL, 2533 MatGetGhosts_MPIBAIJ, 2534 NULL, 2535 NULL, 2536 /*119*/ NULL, 2537 NULL, 2538 NULL, 2539 NULL, 2540 MatGetMultiProcBlock_MPIBAIJ, 2541 /*124*/ NULL, 2542 MatGetColumnReductions_MPIBAIJ, 2543 MatInvertBlockDiagonal_MPIBAIJ, 2544 NULL, 2545 NULL, 2546 /*129*/ NULL, 2547 NULL, 2548 NULL, 2549 NULL, 2550 NULL, 2551 /*134*/ NULL, 2552 NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 /*139*/ MatSetBlockSizes_Default, 2557 NULL, 2558 NULL, 2559 MatFDColoringSetUp_MPIXAIJ, 2560 NULL, 2561 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2562 NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 /*150*/ NULL, 2568 MatEliminateZeros_MPIBAIJ, 2569 MatGetRowSumAbs_MPIBAIJ, 2570 NULL, 2571 NULL, 2572 /*155*/ NULL, 2573 MatCopyHashToXAIJ_MPI_Hash}; 2574 2575 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2576 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2577 2578 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2579 { 2580 PetscInt m, rstart, cstart, cend; 2581 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2582 const PetscInt *JJ = NULL; 2583 PetscScalar *values = NULL; 2584 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2585 PetscBool nooffprocentries; 2586 2587 PetscFunctionBegin; 2588 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2589 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2590 PetscCall(PetscLayoutSetUp(B->rmap)); 2591 PetscCall(PetscLayoutSetUp(B->cmap)); 2592 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2593 m = B->rmap->n / bs; 2594 rstart = B->rmap->rstart / bs; 2595 cstart = B->cmap->rstart / bs; 2596 cend = B->cmap->rend / bs; 2597 2598 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2599 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2600 for (i = 0; i < m; i++) { 2601 nz = ii[i + 1] - ii[i]; 2602 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2603 nz_max = PetscMax(nz_max, nz); 2604 dlen = 0; 2605 olen = 0; 2606 JJ = jj + ii[i]; 2607 for (j = 0; j < nz; j++) { 2608 if (*JJ < cstart || *JJ >= cend) olen++; 2609 else dlen++; 2610 JJ++; 2611 } 2612 d_nnz[i] = dlen; 2613 o_nnz[i] = olen; 2614 } 2615 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2616 PetscCall(PetscFree2(d_nnz, o_nnz)); 2617 2618 values = (PetscScalar *)V; 2619 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2620 for (i = 0; i < m; i++) { 2621 PetscInt row = i + rstart; 2622 PetscInt ncols = ii[i + 1] - ii[i]; 2623 const PetscInt *icols = jj + ii[i]; 2624 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2625 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2626 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2627 } else { /* block ordering does not match so we can only insert one block at a time. */ 2628 PetscInt j; 2629 for (j = 0; j < ncols; j++) { 2630 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2631 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2632 } 2633 } 2634 } 2635 2636 if (!V) PetscCall(PetscFree(values)); 2637 nooffprocentries = B->nooffprocentries; 2638 B->nooffprocentries = PETSC_TRUE; 2639 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2640 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2641 B->nooffprocentries = nooffprocentries; 2642 2643 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2644 PetscFunctionReturn(PETSC_SUCCESS); 2645 } 2646 2647 /*@C 2648 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2649 2650 Collective 2651 2652 Input Parameters: 2653 + B - the matrix 2654 . bs - the block size 2655 . i - the indices into `j` for the start of each local row (starts with zero) 2656 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2657 - v - optional values in the matrix, use `NULL` if not provided 2658 2659 Level: advanced 2660 2661 Notes: 2662 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2663 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2664 called this routine. 2665 2666 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2667 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2668 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2669 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2670 block column and the second index is over columns within a block. 2671 2672 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2673 2674 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2675 @*/ 2676 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2677 { 2678 PetscFunctionBegin; 2679 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2680 PetscValidType(B, 1); 2681 PetscValidLogicalCollectiveInt(B, bs, 2); 2682 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2683 PetscFunctionReturn(PETSC_SUCCESS); 2684 } 2685 2686 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2687 { 2688 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2689 PetscInt i; 2690 PetscMPIInt size; 2691 2692 PetscFunctionBegin; 2693 if (B->hash_active) { 2694 B->ops[0] = b->cops; 2695 B->hash_active = PETSC_FALSE; 2696 } 2697 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2698 PetscCall(MatSetBlockSize(B, bs)); 2699 PetscCall(PetscLayoutSetUp(B->rmap)); 2700 PetscCall(PetscLayoutSetUp(B->cmap)); 2701 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2702 2703 if (d_nnz) { 2704 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2705 } 2706 if (o_nnz) { 2707 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2708 } 2709 2710 b->bs2 = bs * bs; 2711 b->mbs = B->rmap->n / bs; 2712 b->nbs = B->cmap->n / bs; 2713 b->Mbs = B->rmap->N / bs; 2714 b->Nbs = B->cmap->N / bs; 2715 2716 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2717 b->rstartbs = B->rmap->rstart / bs; 2718 b->rendbs = B->rmap->rend / bs; 2719 b->cstartbs = B->cmap->rstart / bs; 2720 b->cendbs = B->cmap->rend / bs; 2721 2722 #if defined(PETSC_USE_CTABLE) 2723 PetscCall(PetscHMapIDestroy(&b->colmap)); 2724 #else 2725 PetscCall(PetscFree(b->colmap)); 2726 #endif 2727 PetscCall(PetscFree(b->garray)); 2728 PetscCall(VecDestroy(&b->lvec)); 2729 PetscCall(VecScatterDestroy(&b->Mvctx)); 2730 2731 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2732 2733 MatSeqXAIJGetOptions_Private(b->B); 2734 PetscCall(MatDestroy(&b->B)); 2735 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2736 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2737 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2738 MatSeqXAIJRestoreOptions_Private(b->B); 2739 2740 MatSeqXAIJGetOptions_Private(b->A); 2741 PetscCall(MatDestroy(&b->A)); 2742 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2743 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2744 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2745 MatSeqXAIJRestoreOptions_Private(b->A); 2746 2747 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2748 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2749 B->preallocated = PETSC_TRUE; 2750 B->was_assembled = PETSC_FALSE; 2751 B->assembled = PETSC_FALSE; 2752 PetscFunctionReturn(PETSC_SUCCESS); 2753 } 2754 2755 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2756 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2757 2758 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2759 { 2760 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2761 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2762 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2763 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2764 2765 PetscFunctionBegin; 2766 PetscCall(PetscMalloc1(M + 1, &ii)); 2767 ii[0] = 0; 2768 for (i = 0; i < M; i++) { 2769 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2770 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2771 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2772 /* remove one from count of matrix has diagonal */ 2773 for (j = id[i]; j < id[i + 1]; j++) { 2774 if (jd[j] == i) { 2775 ii[i + 1]--; 2776 break; 2777 } 2778 } 2779 } 2780 PetscCall(PetscMalloc1(ii[M], &jj)); 2781 cnt = 0; 2782 for (i = 0; i < M; i++) { 2783 for (j = io[i]; j < io[i + 1]; j++) { 2784 if (garray[jo[j]] > rstart) break; 2785 jj[cnt++] = garray[jo[j]]; 2786 } 2787 for (k = id[i]; k < id[i + 1]; k++) { 2788 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2789 } 2790 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2791 } 2792 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2793 PetscFunctionReturn(PETSC_SUCCESS); 2794 } 2795 2796 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2797 2798 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2799 2800 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2801 { 2802 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2803 Mat_MPIAIJ *b; 2804 Mat B; 2805 2806 PetscFunctionBegin; 2807 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2808 2809 if (reuse == MAT_REUSE_MATRIX) { 2810 B = *newmat; 2811 } else { 2812 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2813 PetscCall(MatSetType(B, MATMPIAIJ)); 2814 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2815 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2816 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2817 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2818 } 2819 b = (Mat_MPIAIJ *)B->data; 2820 2821 if (reuse == MAT_REUSE_MATRIX) { 2822 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2823 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2824 } else { 2825 PetscInt *garray = a->garray; 2826 Mat_SeqAIJ *bB; 2827 PetscInt bs, nnz; 2828 PetscCall(MatDestroy(&b->A)); 2829 PetscCall(MatDestroy(&b->B)); 2830 /* just clear out the data structure */ 2831 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2832 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2834 2835 /* Global numbering for b->B columns */ 2836 bB = (Mat_SeqAIJ *)b->B->data; 2837 bs = A->rmap->bs; 2838 nnz = bB->i[A->rmap->n]; 2839 for (PetscInt k = 0; k < nnz; k++) { 2840 PetscInt bj = bB->j[k] / bs; 2841 PetscInt br = bB->j[k] % bs; 2842 bB->j[k] = garray[bj] * bs + br; 2843 } 2844 } 2845 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2846 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2847 2848 if (reuse == MAT_INPLACE_MATRIX) { 2849 PetscCall(MatHeaderReplace(A, &B)); 2850 } else { 2851 *newmat = B; 2852 } 2853 PetscFunctionReturn(PETSC_SUCCESS); 2854 } 2855 2856 /*MC 2857 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2858 2859 Options Database Keys: 2860 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2861 . -mat_block_size <bs> - set the blocksize used to store the matrix 2862 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2863 - -mat_use_hash_table <fact> - set hash table factor 2864 2865 Level: beginner 2866 2867 Note: 2868 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2869 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2870 2871 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2872 M*/ 2873 2874 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2875 2876 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2877 { 2878 Mat_MPIBAIJ *b; 2879 PetscBool flg = PETSC_FALSE; 2880 2881 PetscFunctionBegin; 2882 PetscCall(PetscNew(&b)); 2883 B->data = (void *)b; 2884 B->ops[0] = MatOps_Values; 2885 B->assembled = PETSC_FALSE; 2886 2887 B->insertmode = NOT_SET_VALUES; 2888 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2889 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2890 2891 /* build local table of row and column ownerships */ 2892 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2893 2894 /* build cache for off array entries formed */ 2895 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2896 2897 b->donotstash = PETSC_FALSE; 2898 b->colmap = NULL; 2899 b->garray = NULL; 2900 b->roworiented = PETSC_TRUE; 2901 2902 /* stuff used in block assembly */ 2903 b->barray = NULL; 2904 2905 /* stuff used for matrix vector multiply */ 2906 b->lvec = NULL; 2907 b->Mvctx = NULL; 2908 2909 /* stuff for MatGetRow() */ 2910 b->rowindices = NULL; 2911 b->rowvalues = NULL; 2912 b->getrowactive = PETSC_FALSE; 2913 2914 /* hash table stuff */ 2915 b->ht = NULL; 2916 b->hd = NULL; 2917 b->ht_size = 0; 2918 b->ht_flag = PETSC_FALSE; 2919 b->ht_fact = 0; 2920 b->ht_total_ct = 0; 2921 b->ht_insert_ct = 0; 2922 2923 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2924 b->ijonly = PETSC_FALSE; 2925 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2929 #if defined(PETSC_HAVE_HYPRE) 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2931 #endif 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2939 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2940 2941 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2942 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2943 if (flg) { 2944 PetscReal fact = 1.39; 2945 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2946 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2947 if (fact <= 1.0) fact = 1.39; 2948 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2949 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2950 } 2951 PetscOptionsEnd(); 2952 PetscFunctionReturn(PETSC_SUCCESS); 2953 } 2954 2955 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2956 /*MC 2957 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2958 2959 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2960 and `MATMPIBAIJ` otherwise. 2961 2962 Options Database Keys: 2963 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2964 2965 Level: beginner 2966 2967 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2968 M*/ 2969 2970 /*@ 2971 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2972 (block compressed row). 2973 2974 Collective 2975 2976 Input Parameters: 2977 + B - the matrix 2978 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2979 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2980 . d_nz - number of block nonzeros per block row in diagonal portion of local 2981 submatrix (same for all local rows) 2982 . d_nnz - array containing the number of block nonzeros in the various block rows 2983 of the in diagonal portion of the local (possibly different for each block 2984 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2985 set it even if it is zero. 2986 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2987 submatrix (same for all local rows). 2988 - o_nnz - array containing the number of nonzeros in the various block rows of the 2989 off-diagonal portion of the local submatrix (possibly different for 2990 each block row) or `NULL`. 2991 2992 If the *_nnz parameter is given then the *_nz parameter is ignored 2993 2994 Options Database Keys: 2995 + -mat_block_size - size of the blocks to use 2996 - -mat_use_hash_table <fact> - set hash table factor 2997 2998 Level: intermediate 2999 3000 Notes: 3001 For good matrix assembly performance 3002 the user should preallocate the matrix storage by setting the parameters 3003 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3004 performance can be increased by more than a factor of 50. 3005 3006 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3007 than it must be used on all processors that share the object for that argument. 3008 3009 Storage Information: 3010 For a square global matrix we define each processor's diagonal portion 3011 to be its local rows and the corresponding columns (a square submatrix); 3012 each processor's off-diagonal portion encompasses the remainder of the 3013 local matrix (a rectangular submatrix). 3014 3015 The user can specify preallocated storage for the diagonal part of 3016 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3017 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3018 memory allocation. Likewise, specify preallocated storage for the 3019 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3020 3021 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3022 the figure below we depict these three local rows and all columns (0-11). 3023 3024 .vb 3025 0 1 2 3 4 5 6 7 8 9 10 11 3026 -------------------------- 3027 row 3 |o o o d d d o o o o o o 3028 row 4 |o o o d d d o o o o o o 3029 row 5 |o o o d d d o o o o o o 3030 -------------------------- 3031 .ve 3032 3033 Thus, any entries in the d locations are stored in the d (diagonal) 3034 submatrix, and any entries in the o locations are stored in the 3035 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3036 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3037 3038 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3039 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3040 In general, for PDE problems in which most nonzeros are near the diagonal, 3041 one expects `d_nz` >> `o_nz`. 3042 3043 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3044 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3045 You can also run with the option `-info` and look for messages with the string 3046 malloc in them to see if additional memory allocation was needed. 3047 3048 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3049 @*/ 3050 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3051 { 3052 PetscFunctionBegin; 3053 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3054 PetscValidType(B, 1); 3055 PetscValidLogicalCollectiveInt(B, bs, 2); 3056 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3057 PetscFunctionReturn(PETSC_SUCCESS); 3058 } 3059 3060 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3061 /*@ 3062 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3063 (block compressed row). 3064 3065 Collective 3066 3067 Input Parameters: 3068 + comm - MPI communicator 3069 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3070 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3071 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3072 This value should be the same as the local size used in creating the 3073 y vector for the matrix-vector product y = Ax. 3074 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3075 This value should be the same as the local size used in creating the 3076 x vector for the matrix-vector product y = Ax. 3077 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3078 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3079 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3080 submatrix (same for all local rows) 3081 . d_nnz - array containing the number of nonzero blocks in the various block rows 3082 of the in diagonal portion of the local (possibly different for each block 3083 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3084 and set it even if it is zero. 3085 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3086 submatrix (same for all local rows). 3087 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3088 off-diagonal portion of the local submatrix (possibly different for 3089 each block row) or NULL. 3090 3091 Output Parameter: 3092 . A - the matrix 3093 3094 Options Database Keys: 3095 + -mat_block_size - size of the blocks to use 3096 - -mat_use_hash_table <fact> - set hash table factor 3097 3098 Level: intermediate 3099 3100 Notes: 3101 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3102 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3103 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3104 3105 For good matrix assembly performance 3106 the user should preallocate the matrix storage by setting the parameters 3107 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3108 performance can be increased by more than a factor of 50. 3109 3110 If the *_nnz parameter is given then the *_nz parameter is ignored 3111 3112 A nonzero block is any block that as 1 or more nonzeros in it 3113 3114 The user MUST specify either the local or global matrix dimensions 3115 (possibly both). 3116 3117 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3118 than it must be used on all processors that share the object for that argument. 3119 3120 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3121 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3122 3123 Storage Information: 3124 For a square global matrix we define each processor's diagonal portion 3125 to be its local rows and the corresponding columns (a square submatrix); 3126 each processor's off-diagonal portion encompasses the remainder of the 3127 local matrix (a rectangular submatrix). 3128 3129 The user can specify preallocated storage for the diagonal part of 3130 the local submatrix with either d_nz or d_nnz (not both). Set 3131 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3132 memory allocation. Likewise, specify preallocated storage for the 3133 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3134 3135 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3136 the figure below we depict these three local rows and all columns (0-11). 3137 3138 .vb 3139 0 1 2 3 4 5 6 7 8 9 10 11 3140 -------------------------- 3141 row 3 |o o o d d d o o o o o o 3142 row 4 |o o o d d d o o o o o o 3143 row 5 |o o o d d d o o o o o o 3144 -------------------------- 3145 .ve 3146 3147 Thus, any entries in the d locations are stored in the d (diagonal) 3148 submatrix, and any entries in the o locations are stored in the 3149 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3150 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3151 3152 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3153 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3154 In general, for PDE problems in which most nonzeros are near the diagonal, 3155 one expects `d_nz` >> `o_nz`. 3156 3157 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3158 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3159 @*/ 3160 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3161 { 3162 PetscMPIInt size; 3163 3164 PetscFunctionBegin; 3165 PetscCall(MatCreate(comm, A)); 3166 PetscCall(MatSetSizes(*A, m, n, M, N)); 3167 PetscCallMPI(MPI_Comm_size(comm, &size)); 3168 if (size > 1) { 3169 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3170 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3171 } else { 3172 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3173 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3174 } 3175 PetscFunctionReturn(PETSC_SUCCESS); 3176 } 3177 3178 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3179 { 3180 Mat mat; 3181 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3182 PetscInt len = 0; 3183 3184 PetscFunctionBegin; 3185 *newmat = NULL; 3186 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3187 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3188 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3189 3190 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3191 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3192 if (matin->hash_active) { 3193 PetscCall(MatSetUp(mat)); 3194 } else { 3195 mat->factortype = matin->factortype; 3196 mat->preallocated = PETSC_TRUE; 3197 mat->assembled = PETSC_TRUE; 3198 mat->insertmode = NOT_SET_VALUES; 3199 3200 a = (Mat_MPIBAIJ *)mat->data; 3201 mat->rmap->bs = matin->rmap->bs; 3202 a->bs2 = oldmat->bs2; 3203 a->mbs = oldmat->mbs; 3204 a->nbs = oldmat->nbs; 3205 a->Mbs = oldmat->Mbs; 3206 a->Nbs = oldmat->Nbs; 3207 3208 a->size = oldmat->size; 3209 a->rank = oldmat->rank; 3210 a->donotstash = oldmat->donotstash; 3211 a->roworiented = oldmat->roworiented; 3212 a->rowindices = NULL; 3213 a->rowvalues = NULL; 3214 a->getrowactive = PETSC_FALSE; 3215 a->barray = NULL; 3216 a->rstartbs = oldmat->rstartbs; 3217 a->rendbs = oldmat->rendbs; 3218 a->cstartbs = oldmat->cstartbs; 3219 a->cendbs = oldmat->cendbs; 3220 3221 /* hash table stuff */ 3222 a->ht = NULL; 3223 a->hd = NULL; 3224 a->ht_size = 0; 3225 a->ht_flag = oldmat->ht_flag; 3226 a->ht_fact = oldmat->ht_fact; 3227 a->ht_total_ct = 0; 3228 a->ht_insert_ct = 0; 3229 3230 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3231 if (oldmat->colmap) { 3232 #if defined(PETSC_USE_CTABLE) 3233 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3234 #else 3235 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3236 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3237 #endif 3238 } else a->colmap = NULL; 3239 3240 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3241 PetscCall(PetscMalloc1(len, &a->garray)); 3242 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3243 } else a->garray = NULL; 3244 3245 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3246 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3247 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3248 3249 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3250 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3251 } 3252 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3253 *newmat = mat; 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3258 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3259 { 3260 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3261 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3262 PetscScalar *matvals; 3263 3264 PetscFunctionBegin; 3265 PetscCall(PetscViewerSetUp(viewer)); 3266 3267 /* read in matrix header */ 3268 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3269 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3270 M = header[1]; 3271 N = header[2]; 3272 nz = header[3]; 3273 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3274 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3275 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3276 3277 /* set block sizes from the viewer's .info file */ 3278 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3279 /* set local sizes if not set already */ 3280 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3281 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3282 /* set global sizes if not set already */ 3283 if (mat->rmap->N < 0) mat->rmap->N = M; 3284 if (mat->cmap->N < 0) mat->cmap->N = N; 3285 PetscCall(PetscLayoutSetUp(mat->rmap)); 3286 PetscCall(PetscLayoutSetUp(mat->cmap)); 3287 3288 /* check if the matrix sizes are correct */ 3289 PetscCall(MatGetSize(mat, &rows, &cols)); 3290 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3291 PetscCall(MatGetBlockSize(mat, &bs)); 3292 PetscCall(MatGetLocalSize(mat, &m, &n)); 3293 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3294 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3295 mbs = m / bs; 3296 nbs = n / bs; 3297 3298 /* read in row lengths and build row indices */ 3299 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3300 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3301 rowidxs[0] = 0; 3302 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3303 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3304 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3305 3306 /* read in column indices and matrix values */ 3307 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3308 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3309 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3310 3311 { /* preallocate matrix storage */ 3312 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3313 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3314 PetscBool sbaij, done; 3315 PetscInt *d_nnz, *o_nnz; 3316 3317 PetscCall(PetscBTCreate(nbs, &bt)); 3318 PetscCall(PetscHSetICreate(&ht)); 3319 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3320 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3321 for (i = 0; i < mbs; i++) { 3322 PetscCall(PetscBTMemzero(nbs, bt)); 3323 PetscCall(PetscHSetIClear(ht)); 3324 for (k = 0; k < bs; k++) { 3325 PetscInt row = bs * i + k; 3326 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3327 PetscInt col = colidxs[j]; 3328 if (!sbaij || col >= row) { 3329 if (col >= cs && col < ce) { 3330 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3331 } else { 3332 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3333 if (done) o_nnz[i]++; 3334 } 3335 } 3336 } 3337 } 3338 } 3339 PetscCall(PetscBTDestroy(&bt)); 3340 PetscCall(PetscHSetIDestroy(&ht)); 3341 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3342 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3343 PetscCall(PetscFree2(d_nnz, o_nnz)); 3344 } 3345 3346 /* store matrix values */ 3347 for (i = 0; i < m; i++) { 3348 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3349 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3350 } 3351 3352 PetscCall(PetscFree(rowidxs)); 3353 PetscCall(PetscFree2(colidxs, matvals)); 3354 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3355 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3356 PetscFunctionReturn(PETSC_SUCCESS); 3357 } 3358 3359 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3360 { 3361 PetscBool isbinary; 3362 3363 PetscFunctionBegin; 3364 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3365 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3366 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3367 PetscFunctionReturn(PETSC_SUCCESS); 3368 } 3369 3370 /*@ 3371 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3372 3373 Input Parameters: 3374 + mat - the matrix 3375 - fact - factor 3376 3377 Options Database Key: 3378 . -mat_use_hash_table <fact> - provide the factor 3379 3380 Level: advanced 3381 3382 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3383 @*/ 3384 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3385 { 3386 PetscFunctionBegin; 3387 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3388 PetscFunctionReturn(PETSC_SUCCESS); 3389 } 3390 3391 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3392 { 3393 Mat_MPIBAIJ *baij; 3394 3395 PetscFunctionBegin; 3396 baij = (Mat_MPIBAIJ *)mat->data; 3397 baij->ht_fact = fact; 3398 PetscFunctionReturn(PETSC_SUCCESS); 3399 } 3400 3401 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3402 { 3403 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3404 PetscBool flg; 3405 3406 PetscFunctionBegin; 3407 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3408 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3409 if (Ad) *Ad = a->A; 3410 if (Ao) *Ao = a->B; 3411 if (colmap) *colmap = a->garray; 3412 PetscFunctionReturn(PETSC_SUCCESS); 3413 } 3414 3415 /* 3416 Special version for direct calls from Fortran (to eliminate two function call overheads 3417 */ 3418 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3419 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3420 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3421 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3422 #endif 3423 3424 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3425 /*@C 3426 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3427 3428 Collective 3429 3430 Input Parameters: 3431 + matin - the matrix 3432 . min - number of input rows 3433 . im - input rows 3434 . nin - number of input columns 3435 . in - input columns 3436 . v - numerical values input 3437 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3438 3439 Level: advanced 3440 3441 Developer Notes: 3442 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3443 3444 .seealso: `Mat`, `MatSetValuesBlocked()` 3445 @*/ 3446 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3447 { 3448 /* convert input arguments to C version */ 3449 Mat mat = *matin; 3450 PetscInt m = *min, n = *nin; 3451 InsertMode addv = *addvin; 3452 3453 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3454 const MatScalar *value; 3455 MatScalar *barray = baij->barray; 3456 PetscBool roworiented = baij->roworiented; 3457 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3458 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3459 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3460 3461 PetscFunctionBegin; 3462 /* tasks normally handled by MatSetValuesBlocked() */ 3463 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3464 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3465 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3466 if (mat->assembled) { 3467 mat->was_assembled = PETSC_TRUE; 3468 mat->assembled = PETSC_FALSE; 3469 } 3470 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3471 3472 if (!barray) { 3473 PetscCall(PetscMalloc1(bs2, &barray)); 3474 baij->barray = barray; 3475 } 3476 3477 if (roworiented) stepval = (n - 1) * bs; 3478 else stepval = (m - 1) * bs; 3479 3480 for (i = 0; i < m; i++) { 3481 if (im[i] < 0) continue; 3482 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3483 if (im[i] >= rstart && im[i] < rend) { 3484 row = im[i] - rstart; 3485 for (j = 0; j < n; j++) { 3486 /* If NumCol = 1 then a copy is not required */ 3487 if ((roworiented) && (n == 1)) { 3488 barray = (MatScalar *)v + i * bs2; 3489 } else if ((!roworiented) && (m == 1)) { 3490 barray = (MatScalar *)v + j * bs2; 3491 } else { /* Here a copy is required */ 3492 if (roworiented) { 3493 value = v + i * (stepval + bs) * bs + j * bs; 3494 } else { 3495 value = v + j * (stepval + bs) * bs + i * bs; 3496 } 3497 for (ii = 0; ii < bs; ii++, value += stepval) { 3498 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3499 } 3500 barray -= bs2; 3501 } 3502 3503 if (in[j] >= cstart && in[j] < cend) { 3504 col = in[j] - cstart; 3505 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3506 } else if (in[j] < 0) { 3507 continue; 3508 } else { 3509 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3510 if (mat->was_assembled) { 3511 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3512 3513 #if defined(PETSC_USE_DEBUG) 3514 #if defined(PETSC_USE_CTABLE) 3515 { 3516 PetscInt data; 3517 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3518 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3519 } 3520 #else 3521 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3522 #endif 3523 #endif 3524 #if defined(PETSC_USE_CTABLE) 3525 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3526 col = (col - 1) / bs; 3527 #else 3528 col = (baij->colmap[in[j]] - 1) / bs; 3529 #endif 3530 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3531 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3532 col = in[j]; 3533 } 3534 } else col = in[j]; 3535 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3536 } 3537 } 3538 } else { 3539 if (!baij->donotstash) { 3540 if (roworiented) { 3541 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3542 } else { 3543 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3544 } 3545 } 3546 } 3547 } 3548 3549 /* task normally handled by MatSetValuesBlocked() */ 3550 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3551 PetscFunctionReturn(PETSC_SUCCESS); 3552 } 3553 3554 /*@ 3555 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3556 3557 Collective 3558 3559 Input Parameters: 3560 + comm - MPI communicator 3561 . bs - the block size, only a block size of 1 is supported 3562 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3563 . n - This value should be the same as the local size used in creating the 3564 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3565 calculated if `N` is given) For square matrices `n` is almost always `m`. 3566 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3567 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3568 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3569 . j - column indices 3570 - a - matrix values 3571 3572 Output Parameter: 3573 . mat - the matrix 3574 3575 Level: intermediate 3576 3577 Notes: 3578 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3579 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3580 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3581 3582 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3583 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3584 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3585 with column-major ordering within blocks. 3586 3587 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3588 3589 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3590 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3591 @*/ 3592 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3593 { 3594 PetscFunctionBegin; 3595 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3596 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3597 PetscCall(MatCreate(comm, mat)); 3598 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3599 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3600 PetscCall(MatSetBlockSize(*mat, bs)); 3601 PetscCall(MatSetUp(*mat)); 3602 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3603 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3604 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3605 PetscFunctionReturn(PETSC_SUCCESS); 3606 } 3607 3608 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3609 { 3610 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3611 PetscInt *indx; 3612 PetscScalar *values; 3613 3614 PetscFunctionBegin; 3615 PetscCall(MatGetSize(inmat, &m, &N)); 3616 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3617 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3618 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3619 PetscInt *bindx, rmax = a->rmax, j; 3620 PetscMPIInt rank, size; 3621 3622 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3623 mbs = m / bs; 3624 Nbs = N / cbs; 3625 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3626 nbs = n / cbs; 3627 3628 PetscCall(PetscMalloc1(rmax, &bindx)); 3629 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3630 3631 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3632 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3633 if (rank == size - 1) { 3634 /* Check sum(nbs) = Nbs */ 3635 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3636 } 3637 3638 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3639 for (i = 0; i < mbs; i++) { 3640 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3641 nnz = nnz / bs; 3642 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3643 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3644 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3645 } 3646 PetscCall(PetscFree(bindx)); 3647 3648 PetscCall(MatCreate(comm, outmat)); 3649 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3650 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3651 PetscCall(MatSetType(*outmat, MATBAIJ)); 3652 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3653 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3654 MatPreallocateEnd(dnz, onz); 3655 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3656 } 3657 3658 /* numeric phase */ 3659 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3660 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3661 3662 for (i = 0; i < m; i++) { 3663 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3664 Ii = i + rstart; 3665 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3666 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3667 } 3668 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3669 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3670 PetscFunctionReturn(PETSC_SUCCESS); 3671 } 3672