1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *vv; 62 Vec vB, vA; 63 const PetscScalar *va, *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayRead(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(v, &vv)); 92 PetscCall(VecRestoreArrayRead(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 721 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 722 v = amat->a; 723 jj = amat->j; 724 for (i = 0; i < amat->nz; i++) { 725 for (j = 0; j < bs; j++) { 726 col = bs * (cstart + *jj) + j; /* column index */ 727 for (row = 0; row < bs; row++) { 728 tmp[col] += PetscAbsScalar(*v); 729 v++; 730 } 731 } 732 jj++; 733 } 734 v = bmat->a; 735 jj = bmat->j; 736 for (i = 0; i < bmat->nz; i++) { 737 for (j = 0; j < bs; j++) { 738 col = bs * garray[*jj] + j; 739 for (row = 0; row < bs; row++) { 740 tmp[col] += PetscAbsScalar(*v); 741 v++; 742 } 743 } 744 jj++; 745 } 746 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 747 *nrm = 0.0; 748 for (j = 0; j < mat->cmap->N; j++) { 749 if (tmp[j] > *nrm) *nrm = tmp[j]; 750 } 751 PetscCall(PetscFree(tmp)); 752 } else if (type == NORM_INFINITY) { /* max row sum */ 753 PetscReal *sums; 754 PetscCall(PetscMalloc1(bs, &sums)); 755 sum = 0.0; 756 for (j = 0; j < amat->mbs; j++) { 757 for (row = 0; row < bs; row++) sums[row] = 0.0; 758 v = amat->a + bs2 * amat->i[j]; 759 nz = amat->i[j + 1] - amat->i[j]; 760 for (i = 0; i < nz; i++) { 761 for (col = 0; col < bs; col++) { 762 for (row = 0; row < bs; row++) { 763 sums[row] += PetscAbsScalar(*v); 764 v++; 765 } 766 } 767 } 768 v = bmat->a + bs2 * bmat->i[j]; 769 nz = bmat->i[j + 1] - bmat->i[j]; 770 for (i = 0; i < nz; i++) { 771 for (col = 0; col < bs; col++) { 772 for (row = 0; row < bs; row++) { 773 sums[row] += PetscAbsScalar(*v); 774 v++; 775 } 776 } 777 } 778 for (row = 0; row < bs; row++) { 779 if (sums[row] > sum) sum = sums[row]; 780 } 781 } 782 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 783 PetscCall(PetscFree(sums)); 784 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 785 } 786 PetscFunctionReturn(PETSC_SUCCESS); 787 } 788 789 /* 790 Creates the hash table, and sets the table 791 This table is created only once. 792 If new entried need to be added to the matrix 793 then the hash table has to be destroyed and 794 recreated. 795 */ 796 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 797 { 798 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 799 Mat A = baij->A, B = baij->B; 800 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 801 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 802 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 803 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 804 PetscInt *HT, key; 805 MatScalar **HD; 806 PetscReal tmp; 807 #if defined(PETSC_USE_INFO) 808 PetscInt ct = 0, max = 0; 809 #endif 810 811 PetscFunctionBegin; 812 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 813 814 baij->ht_size = (PetscInt)(factor * nz); 815 ht_size = baij->ht_size; 816 817 /* Allocate Memory for Hash Table */ 818 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 819 HD = baij->hd; 820 HT = baij->ht; 821 822 /* Loop Over A */ 823 for (i = 0; i < a->mbs; i++) { 824 for (j = ai[i]; j < ai[i + 1]; j++) { 825 row = i + rstart; 826 col = aj[j] + cstart; 827 828 key = row * Nbs + col + 1; 829 h1 = HASH(ht_size, key, tmp); 830 for (k = 0; k < ht_size; k++) { 831 if (!HT[(h1 + k) % ht_size]) { 832 HT[(h1 + k) % ht_size] = key; 833 HD[(h1 + k) % ht_size] = a->a + j * bs2; 834 break; 835 #if defined(PETSC_USE_INFO) 836 } else { 837 ct++; 838 #endif 839 } 840 } 841 #if defined(PETSC_USE_INFO) 842 if (k > max) max = k; 843 #endif 844 } 845 } 846 /* Loop Over B */ 847 for (i = 0; i < b->mbs; i++) { 848 for (j = bi[i]; j < bi[i + 1]; j++) { 849 row = i + rstart; 850 col = garray[bj[j]]; 851 key = row * Nbs + col + 1; 852 h1 = HASH(ht_size, key, tmp); 853 for (k = 0; k < ht_size; k++) { 854 if (!HT[(h1 + k) % ht_size]) { 855 HT[(h1 + k) % ht_size] = key; 856 HD[(h1 + k) % ht_size] = b->a + j * bs2; 857 break; 858 #if defined(PETSC_USE_INFO) 859 } else { 860 ct++; 861 #endif 862 } 863 } 864 #if defined(PETSC_USE_INFO) 865 if (k > max) max = k; 866 #endif 867 } 868 } 869 870 /* Print Summary */ 871 #if defined(PETSC_USE_INFO) 872 for (i = 0, j = 0; i < ht_size; i++) { 873 if (HT[i]) j++; 874 } 875 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 876 #endif 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 881 { 882 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 883 PetscInt nstash, reallocs; 884 885 PetscFunctionBegin; 886 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 887 888 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 889 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 890 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 891 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 892 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 893 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 898 { 899 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 900 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 901 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 902 PetscInt *row, *col; 903 PetscBool r1, r2, r3, other_disassembled; 904 MatScalar *val; 905 PetscMPIInt n; 906 907 PetscFunctionBegin; 908 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 909 if (!baij->donotstash && !mat->nooffprocentries) { 910 while (1) { 911 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 912 if (!flg) break; 913 914 for (i = 0; i < n;) { 915 /* Now identify the consecutive vals belonging to the same row */ 916 for (j = i, rstart = row[j]; j < n; j++) { 917 if (row[j] != rstart) break; 918 } 919 if (j < n) ncols = j - i; 920 else ncols = n - i; 921 /* Now assemble all these values with a single function call */ 922 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 923 i = j; 924 } 925 } 926 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 927 /* Now process the block-stash. Since the values are stashed column-oriented, 928 set the row-oriented flag to column-oriented, and after MatSetValues() 929 restore the original flags */ 930 r1 = baij->roworiented; 931 r2 = a->roworiented; 932 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 933 934 baij->roworiented = PETSC_FALSE; 935 a->roworiented = PETSC_FALSE; 936 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 937 while (1) { 938 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 939 if (!flg) break; 940 941 for (i = 0; i < n;) { 942 /* Now identify the consecutive vals belonging to the same row */ 943 for (j = i, rstart = row[j]; j < n; j++) { 944 if (row[j] != rstart) break; 945 } 946 if (j < n) ncols = j - i; 947 else ncols = n - i; 948 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 949 i = j; 950 } 951 } 952 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 953 954 baij->roworiented = r1; 955 a->roworiented = r2; 956 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 957 } 958 959 PetscCall(MatAssemblyBegin(baij->A, mode)); 960 PetscCall(MatAssemblyEnd(baij->A, mode)); 961 962 /* determine if any processor has disassembled, if so we must 963 also disassemble ourselves, in order that we may reassemble. */ 964 /* 965 if nonzero structure of submatrix B cannot change then we know that 966 no processor disassembled thus we can skip this stuff 967 */ 968 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 969 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 970 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 971 } 972 973 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 974 PetscCall(MatAssemblyBegin(baij->B, mode)); 975 PetscCall(MatAssemblyEnd(baij->B, mode)); 976 977 #if defined(PETSC_USE_INFO) 978 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 979 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 980 981 baij->ht_total_ct = 0; 982 baij->ht_insert_ct = 0; 983 } 984 #endif 985 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 986 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 987 988 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 989 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 990 } 991 992 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 993 994 baij->rowvalues = NULL; 995 996 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 997 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 998 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 999 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1000 } 1001 PetscFunctionReturn(PETSC_SUCCESS); 1002 } 1003 1004 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1005 #include <petscdraw.h> 1006 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1007 { 1008 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1009 PetscMPIInt rank = baij->rank; 1010 PetscInt bs = mat->rmap->bs; 1011 PetscBool iascii, isdraw; 1012 PetscViewer sviewer; 1013 PetscViewerFormat format; 1014 1015 PetscFunctionBegin; 1016 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1018 if (iascii) { 1019 PetscCall(PetscViewerGetFormat(viewer, &format)); 1020 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1021 MatInfo info; 1022 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1023 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1024 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1025 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1026 mat->rmap->bs, info.memory)); 1027 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1028 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1029 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1031 PetscCall(PetscViewerFlush(viewer)); 1032 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1033 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1034 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1037 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 } 1043 1044 if (isdraw) { 1045 PetscDraw draw; 1046 PetscBool isnull; 1047 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1048 PetscCall(PetscDrawIsNull(draw, &isnull)); 1049 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 { 1053 /* assemble the entire matrix onto first processor. */ 1054 Mat A; 1055 Mat_SeqBAIJ *Aloc; 1056 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1057 MatScalar *a; 1058 const char *matname; 1059 1060 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1061 /* Perhaps this should be the type of mat? */ 1062 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1063 if (rank == 0) { 1064 PetscCall(MatSetSizes(A, M, N, M, N)); 1065 } else { 1066 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1067 } 1068 PetscCall(MatSetType(A, MATMPIBAIJ)); 1069 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1070 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1071 1072 /* copy over the A part */ 1073 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1074 ai = Aloc->i; 1075 aj = Aloc->j; 1076 a = Aloc->a; 1077 PetscCall(PetscMalloc1(bs, &rvals)); 1078 1079 for (i = 0; i < mbs; i++) { 1080 rvals[0] = bs * (baij->rstartbs + i); 1081 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1082 for (j = ai[i]; j < ai[i + 1]; j++) { 1083 col = (baij->cstartbs + aj[j]) * bs; 1084 for (k = 0; k < bs; k++) { 1085 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1086 col++; 1087 a += bs; 1088 } 1089 } 1090 } 1091 /* copy over the B part */ 1092 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1093 ai = Aloc->i; 1094 aj = Aloc->j; 1095 a = Aloc->a; 1096 for (i = 0; i < mbs; i++) { 1097 rvals[0] = bs * (baij->rstartbs + i); 1098 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1099 for (j = ai[i]; j < ai[i + 1]; j++) { 1100 col = baij->garray[aj[j]] * bs; 1101 for (k = 0; k < bs; k++) { 1102 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1103 col++; 1104 a += bs; 1105 } 1106 } 1107 } 1108 PetscCall(PetscFree(rvals)); 1109 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1110 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1111 /* 1112 Everyone has to call to draw the matrix since the graphics waits are 1113 synchronized across all processors that share the PetscDraw object 1114 */ 1115 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1116 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1117 if (rank == 0) { 1118 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1119 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1120 } 1121 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1122 PetscCall(MatDestroy(&A)); 1123 } 1124 PetscFunctionReturn(PETSC_SUCCESS); 1125 } 1126 1127 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1128 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1129 { 1130 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1131 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1132 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1133 const PetscInt *garray = aij->garray; 1134 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1135 PetscCount nz, hnz; 1136 PetscInt *rowlens, *colidxs; 1137 PetscScalar *matvals; 1138 PetscMPIInt rank; 1139 1140 PetscFunctionBegin; 1141 PetscCall(PetscViewerSetUp(viewer)); 1142 1143 M = mat->rmap->N; 1144 N = mat->cmap->N; 1145 m = mat->rmap->n; 1146 rs = mat->rmap->rstart; 1147 cs = mat->cmap->rstart; 1148 bs = mat->rmap->bs; 1149 nz = bs * bs * (A->nz + B->nz); 1150 1151 /* write matrix header */ 1152 header[0] = MAT_FILE_CLASSID; 1153 header[1] = M; 1154 header[2] = N; 1155 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1156 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1157 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1158 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1159 1160 /* fill in and store row lengths */ 1161 PetscCall(PetscMalloc1(m, &rowlens)); 1162 for (cnt = 0, i = 0; i < A->mbs; i++) 1163 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1164 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1165 PetscCall(PetscFree(rowlens)); 1166 1167 /* fill in and store column indices */ 1168 PetscCall(PetscMalloc1(nz, &colidxs)); 1169 for (cnt = 0, i = 0; i < A->mbs; i++) { 1170 for (k = 0; k < bs; k++) { 1171 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1172 if (garray[B->j[jb]] > cs / bs) break; 1173 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1174 } 1175 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1176 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1177 for (; jb < B->i[i + 1]; jb++) 1178 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1179 } 1180 } 1181 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1182 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1183 PetscCall(PetscFree(colidxs)); 1184 1185 /* fill in and store nonzero values */ 1186 PetscCall(PetscMalloc1(nz, &matvals)); 1187 for (cnt = 0, i = 0; i < A->mbs; i++) { 1188 for (k = 0; k < bs; k++) { 1189 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1190 if (garray[B->j[jb]] > cs / bs) break; 1191 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1192 } 1193 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1194 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1195 for (; jb < B->i[i + 1]; jb++) 1196 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1197 } 1198 } 1199 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1200 PetscCall(PetscFree(matvals)); 1201 1202 /* write block size option to the viewer's .info file */ 1203 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1204 PetscFunctionReturn(PETSC_SUCCESS); 1205 } 1206 1207 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1208 { 1209 PetscBool iascii, isdraw, issocket, isbinary; 1210 1211 PetscFunctionBegin; 1212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1213 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1214 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1216 if (iascii || isdraw || issocket) { 1217 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1218 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1219 PetscFunctionReturn(PETSC_SUCCESS); 1220 } 1221 1222 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1225 PetscInt nt; 1226 1227 PetscFunctionBegin; 1228 PetscCall(VecGetLocalSize(xx, &nt)); 1229 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1230 PetscCall(VecGetLocalSize(yy, &nt)); 1231 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1232 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1233 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1234 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1235 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1236 PetscFunctionReturn(PETSC_SUCCESS); 1237 } 1238 1239 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1240 { 1241 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1242 1243 PetscFunctionBegin; 1244 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1245 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1246 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1247 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1248 PetscFunctionReturn(PETSC_SUCCESS); 1249 } 1250 1251 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1252 { 1253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1254 1255 PetscFunctionBegin; 1256 /* do nondiagonal part */ 1257 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1258 /* do local part */ 1259 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1260 /* add partial results together */ 1261 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1262 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscFunctionReturn(PETSC_SUCCESS); 1264 } 1265 1266 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1267 { 1268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1269 1270 PetscFunctionBegin; 1271 /* do nondiagonal part */ 1272 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1273 /* do local part */ 1274 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1275 /* add partial results together */ 1276 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1277 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscFunctionReturn(PETSC_SUCCESS); 1279 } 1280 1281 /* 1282 This only works correctly for square matrices where the subblock A->A is the 1283 diagonal block 1284 */ 1285 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1286 { 1287 PetscFunctionBegin; 1288 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1289 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 1293 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1294 { 1295 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1296 1297 PetscFunctionBegin; 1298 PetscCall(MatScale(a->A, aa)); 1299 PetscCall(MatScale(a->B, aa)); 1300 PetscFunctionReturn(PETSC_SUCCESS); 1301 } 1302 1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1304 { 1305 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1306 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1307 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1308 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1309 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1310 1311 PetscFunctionBegin; 1312 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1313 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1314 mat->getrowactive = PETSC_TRUE; 1315 1316 if (!mat->rowvalues && (idx || v)) { 1317 /* 1318 allocate enough space to hold information from the longest row. 1319 */ 1320 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1321 PetscInt max = 1, mbs = mat->mbs, tmp; 1322 for (i = 0; i < mbs; i++) { 1323 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1324 if (max < tmp) max = tmp; 1325 } 1326 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1327 } 1328 lrow = row - brstart; 1329 1330 pvA = &vworkA; 1331 pcA = &cworkA; 1332 pvB = &vworkB; 1333 pcB = &cworkB; 1334 if (!v) { 1335 pvA = NULL; 1336 pvB = NULL; 1337 } 1338 if (!idx) { 1339 pcA = NULL; 1340 if (!v) pcB = NULL; 1341 } 1342 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1343 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1344 nztot = nzA + nzB; 1345 1346 cmap = mat->garray; 1347 if (v || idx) { 1348 if (nztot) { 1349 /* Sort by increasing column numbers, assuming A and B already sorted */ 1350 PetscInt imark = -1; 1351 if (v) { 1352 *v = v_p = mat->rowvalues; 1353 for (i = 0; i < nzB; i++) { 1354 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1355 else break; 1356 } 1357 imark = i; 1358 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1359 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1360 } 1361 if (idx) { 1362 *idx = idx_p = mat->rowindices; 1363 if (imark > -1) { 1364 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1365 } else { 1366 for (i = 0; i < nzB; i++) { 1367 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1368 else break; 1369 } 1370 imark = i; 1371 } 1372 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1373 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1374 } 1375 } else { 1376 if (idx) *idx = NULL; 1377 if (v) *v = NULL; 1378 } 1379 } 1380 *nz = nztot; 1381 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1382 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1387 { 1388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1389 1390 PetscFunctionBegin; 1391 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1392 baij->getrowactive = PETSC_FALSE; 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1397 { 1398 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1399 1400 PetscFunctionBegin; 1401 PetscCall(MatZeroEntries(l->A)); 1402 PetscCall(MatZeroEntries(l->B)); 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1407 { 1408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1409 Mat A = a->A, B = a->B; 1410 PetscLogDouble isend[5], irecv[5]; 1411 1412 PetscFunctionBegin; 1413 info->block_size = (PetscReal)matin->rmap->bs; 1414 1415 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1416 1417 isend[0] = info->nz_used; 1418 isend[1] = info->nz_allocated; 1419 isend[2] = info->nz_unneeded; 1420 isend[3] = info->memory; 1421 isend[4] = info->mallocs; 1422 1423 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1424 1425 isend[0] += info->nz_used; 1426 isend[1] += info->nz_allocated; 1427 isend[2] += info->nz_unneeded; 1428 isend[3] += info->memory; 1429 isend[4] += info->mallocs; 1430 1431 if (flag == MAT_LOCAL) { 1432 info->nz_used = isend[0]; 1433 info->nz_allocated = isend[1]; 1434 info->nz_unneeded = isend[2]; 1435 info->memory = isend[3]; 1436 info->mallocs = isend[4]; 1437 } else if (flag == MAT_GLOBAL_MAX) { 1438 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else if (flag == MAT_GLOBAL_SUM) { 1446 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1454 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1455 info->fill_ratio_needed = 0; 1456 info->factor_mallocs = 0; 1457 PetscFunctionReturn(PETSC_SUCCESS); 1458 } 1459 1460 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1461 { 1462 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1463 1464 PetscFunctionBegin; 1465 switch (op) { 1466 case MAT_NEW_NONZERO_LOCATIONS: 1467 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1468 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1469 case MAT_KEEP_NONZERO_PATTERN: 1470 case MAT_NEW_NONZERO_LOCATION_ERR: 1471 MatCheckPreallocated(A, 1); 1472 PetscCall(MatSetOption(a->A, op, flg)); 1473 PetscCall(MatSetOption(a->B, op, flg)); 1474 break; 1475 case MAT_ROW_ORIENTED: 1476 MatCheckPreallocated(A, 1); 1477 a->roworiented = flg; 1478 1479 PetscCall(MatSetOption(a->A, op, flg)); 1480 PetscCall(MatSetOption(a->B, op, flg)); 1481 break; 1482 case MAT_FORCE_DIAGONAL_ENTRIES: 1483 case MAT_SORTED_FULL: 1484 case MAT_SUBMAT_SINGLEIS: 1485 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1486 break; 1487 case MAT_IGNORE_OFF_PROC_ENTRIES: 1488 a->donotstash = flg; 1489 break; 1490 case MAT_USE_HASH_TABLE: 1491 a->ht_flag = flg; 1492 a->ht_fact = 1.39; 1493 break; 1494 case MAT_SYMMETRIC: 1495 case MAT_STRUCTURALLY_SYMMETRIC: 1496 case MAT_HERMITIAN: 1497 case MAT_SYMMETRY_ETERNAL: 1498 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1499 case MAT_SPD_ETERNAL: 1500 /* if the diagonal matrix is square it inherits some of the properties above */ 1501 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1502 break; 1503 default: 1504 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1505 } 1506 PetscFunctionReturn(PETSC_SUCCESS); 1507 } 1508 1509 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1512 Mat_SeqBAIJ *Aloc; 1513 Mat B; 1514 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1515 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1516 MatScalar *a; 1517 1518 PetscFunctionBegin; 1519 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1520 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1521 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1522 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1523 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1524 /* Do not know preallocation information, but must set block size */ 1525 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1526 } else { 1527 B = *matout; 1528 } 1529 1530 /* copy over the A part */ 1531 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1532 ai = Aloc->i; 1533 aj = Aloc->j; 1534 a = Aloc->a; 1535 PetscCall(PetscMalloc1(bs, &rvals)); 1536 1537 for (i = 0; i < mbs; i++) { 1538 rvals[0] = bs * (baij->rstartbs + i); 1539 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1540 for (j = ai[i]; j < ai[i + 1]; j++) { 1541 col = (baij->cstartbs + aj[j]) * bs; 1542 for (k = 0; k < bs; k++) { 1543 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1544 1545 col++; 1546 a += bs; 1547 } 1548 } 1549 } 1550 /* copy over the B part */ 1551 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1552 ai = Aloc->i; 1553 aj = Aloc->j; 1554 a = Aloc->a; 1555 for (i = 0; i < mbs; i++) { 1556 rvals[0] = bs * (baij->rstartbs + i); 1557 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1558 for (j = ai[i]; j < ai[i + 1]; j++) { 1559 col = baij->garray[aj[j]] * bs; 1560 for (k = 0; k < bs; k++) { 1561 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1562 col++; 1563 a += bs; 1564 } 1565 } 1566 } 1567 PetscCall(PetscFree(rvals)); 1568 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1569 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1570 1571 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1572 else PetscCall(MatHeaderMerge(A, &B)); 1573 PetscFunctionReturn(PETSC_SUCCESS); 1574 } 1575 1576 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1577 { 1578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1579 Mat a = baij->A, b = baij->B; 1580 PetscInt s1, s2, s3; 1581 1582 PetscFunctionBegin; 1583 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1584 if (rr) { 1585 PetscCall(VecGetLocalSize(rr, &s1)); 1586 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1587 /* Overlap communication with computation. */ 1588 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1589 } 1590 if (ll) { 1591 PetscCall(VecGetLocalSize(ll, &s1)); 1592 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1593 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1594 } 1595 /* scale the diagonal block */ 1596 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1597 1598 if (rr) { 1599 /* Do a scatter end and then right scale the off-diagonal block */ 1600 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1601 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1602 } 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 } 1605 1606 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1607 { 1608 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1609 PetscInt *lrows; 1610 PetscInt r, len; 1611 PetscBool cong; 1612 1613 PetscFunctionBegin; 1614 /* get locally owned rows */ 1615 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1616 /* fix right-hand side if needed */ 1617 if (x && b) { 1618 const PetscScalar *xx; 1619 PetscScalar *bb; 1620 1621 PetscCall(VecGetArrayRead(x, &xx)); 1622 PetscCall(VecGetArray(b, &bb)); 1623 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1624 PetscCall(VecRestoreArrayRead(x, &xx)); 1625 PetscCall(VecRestoreArray(b, &bb)); 1626 } 1627 1628 /* actually zap the local rows */ 1629 /* 1630 Zero the required rows. If the "diagonal block" of the matrix 1631 is square and the user wishes to set the diagonal we use separate 1632 code so that MatSetValues() is not called for each diagonal allocating 1633 new memory, thus calling lots of mallocs and slowing things down. 1634 1635 */ 1636 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1637 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1638 PetscCall(MatHasCongruentLayouts(A, &cong)); 1639 if ((diag != 0.0) && cong) { 1640 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1641 } else if (diag != 0.0) { 1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1643 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1644 for (r = 0; r < len; ++r) { 1645 const PetscInt row = lrows[r] + A->rmap->rstart; 1646 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1647 } 1648 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1650 } else { 1651 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1652 } 1653 PetscCall(PetscFree(lrows)); 1654 1655 /* only change matrix nonzero state if pattern was allowed to be changed */ 1656 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1657 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1658 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1659 } 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1664 { 1665 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1666 PetscMPIInt n, p = 0; 1667 PetscInt i, j, k, r, len = 0, row, col, count; 1668 PetscInt *lrows, *owners = A->rmap->range; 1669 PetscSFNode *rrows; 1670 PetscSF sf; 1671 const PetscScalar *xx; 1672 PetscScalar *bb, *mask; 1673 Vec xmask, lmask; 1674 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1675 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1676 PetscScalar *aa; 1677 1678 PetscFunctionBegin; 1679 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1680 /* Create SF where leaves are input rows and roots are owned rows */ 1681 PetscCall(PetscMalloc1(n, &lrows)); 1682 for (r = 0; r < n; ++r) lrows[r] = -1; 1683 PetscCall(PetscMalloc1(N, &rrows)); 1684 for (r = 0; r < N; ++r) { 1685 const PetscInt idx = rows[r]; 1686 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1687 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1688 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1689 } 1690 rrows[r].rank = p; 1691 rrows[r].index = rows[r] - owners[p]; 1692 } 1693 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1694 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1695 /* Collect flags for rows to be zeroed */ 1696 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1697 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1698 PetscCall(PetscSFDestroy(&sf)); 1699 /* Compress and put in row numbers */ 1700 for (r = 0; r < n; ++r) 1701 if (lrows[r] >= 0) lrows[len++] = r; 1702 /* zero diagonal part of matrix */ 1703 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1704 /* handle off-diagonal part of matrix */ 1705 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1706 PetscCall(VecDuplicate(l->lvec, &lmask)); 1707 PetscCall(VecGetArray(xmask, &bb)); 1708 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1709 PetscCall(VecRestoreArray(xmask, &bb)); 1710 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1712 PetscCall(VecDestroy(&xmask)); 1713 if (x) { 1714 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1716 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1717 PetscCall(VecGetArray(b, &bb)); 1718 } 1719 PetscCall(VecGetArray(lmask, &mask)); 1720 /* remove zeroed rows of off-diagonal matrix */ 1721 for (i = 0; i < len; ++i) { 1722 row = lrows[i]; 1723 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1724 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1725 for (k = 0; k < count; ++k) { 1726 aa[0] = 0.0; 1727 aa += bs; 1728 } 1729 } 1730 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1731 for (i = 0; i < l->B->rmap->N; ++i) { 1732 row = i / bs; 1733 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1734 for (k = 0; k < bs; ++k) { 1735 col = bs * baij->j[j] + k; 1736 if (PetscAbsScalar(mask[col])) { 1737 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1738 if (x) bb[i] -= aa[0] * xx[col]; 1739 aa[0] = 0.0; 1740 } 1741 } 1742 } 1743 } 1744 if (x) { 1745 PetscCall(VecRestoreArray(b, &bb)); 1746 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1747 } 1748 PetscCall(VecRestoreArray(lmask, &mask)); 1749 PetscCall(VecDestroy(&lmask)); 1750 PetscCall(PetscFree(lrows)); 1751 1752 /* only change matrix nonzero state if pattern was allowed to be changed */ 1753 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1754 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1755 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1756 } 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1761 { 1762 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1763 1764 PetscFunctionBegin; 1765 PetscCall(MatSetUnfactored(a->A)); 1766 PetscFunctionReturn(PETSC_SUCCESS); 1767 } 1768 1769 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1770 1771 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1772 { 1773 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1774 Mat a, b, c, d; 1775 PetscBool flg; 1776 1777 PetscFunctionBegin; 1778 a = matA->A; 1779 b = matA->B; 1780 c = matB->A; 1781 d = matB->B; 1782 1783 PetscCall(MatEqual(a, c, &flg)); 1784 if (flg) PetscCall(MatEqual(b, d, &flg)); 1785 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1786 PetscFunctionReturn(PETSC_SUCCESS); 1787 } 1788 1789 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1790 { 1791 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1792 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1793 1794 PetscFunctionBegin; 1795 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1796 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1797 PetscCall(MatCopy_Basic(A, B, str)); 1798 } else { 1799 PetscCall(MatCopy(a->A, b->A, str)); 1800 PetscCall(MatCopy(a->B, b->B, str)); 1801 } 1802 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1803 PetscFunctionReturn(PETSC_SUCCESS); 1804 } 1805 1806 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1807 { 1808 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1809 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1810 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1811 1812 PetscFunctionBegin; 1813 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1814 PetscFunctionReturn(PETSC_SUCCESS); 1815 } 1816 1817 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1818 { 1819 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1820 PetscBLASInt bnz, one = 1; 1821 Mat_SeqBAIJ *x, *y; 1822 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1823 1824 PetscFunctionBegin; 1825 if (str == SAME_NONZERO_PATTERN) { 1826 PetscScalar alpha = a; 1827 x = (Mat_SeqBAIJ *)xx->A->data; 1828 y = (Mat_SeqBAIJ *)yy->A->data; 1829 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1830 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1831 x = (Mat_SeqBAIJ *)xx->B->data; 1832 y = (Mat_SeqBAIJ *)yy->B->data; 1833 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1834 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1835 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1836 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1837 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1838 } else { 1839 Mat B; 1840 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1841 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1842 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1843 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1844 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1845 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1846 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1847 PetscCall(MatSetType(B, MATMPIBAIJ)); 1848 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1849 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1850 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1851 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1852 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1853 PetscCall(MatHeaderMerge(Y, &B)); 1854 PetscCall(PetscFree(nnz_d)); 1855 PetscCall(PetscFree(nnz_o)); 1856 } 1857 PetscFunctionReturn(PETSC_SUCCESS); 1858 } 1859 1860 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1861 { 1862 PetscFunctionBegin; 1863 if (PetscDefined(USE_COMPLEX)) { 1864 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1865 1866 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1867 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1868 } 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 1872 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1873 { 1874 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1875 1876 PetscFunctionBegin; 1877 PetscCall(MatRealPart(a->A)); 1878 PetscCall(MatRealPart(a->B)); 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 1882 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1883 { 1884 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1885 1886 PetscFunctionBegin; 1887 PetscCall(MatImaginaryPart(a->A)); 1888 PetscCall(MatImaginaryPart(a->B)); 1889 PetscFunctionReturn(PETSC_SUCCESS); 1890 } 1891 1892 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1893 { 1894 IS iscol_local; 1895 PetscInt csize; 1896 1897 PetscFunctionBegin; 1898 PetscCall(ISGetLocalSize(iscol, &csize)); 1899 if (call == MAT_REUSE_MATRIX) { 1900 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1901 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1902 } else { 1903 PetscCall(ISAllGather(iscol, &iscol_local)); 1904 } 1905 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1906 if (call == MAT_INITIAL_MATRIX) { 1907 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1908 PetscCall(ISDestroy(&iscol_local)); 1909 } 1910 PetscFunctionReturn(PETSC_SUCCESS); 1911 } 1912 1913 /* 1914 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1915 in local and then by concatenating the local matrices the end result. 1916 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1917 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1918 */ 1919 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1920 { 1921 PetscMPIInt rank, size; 1922 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1923 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1924 Mat M, Mreuse; 1925 MatScalar *vwork, *aa; 1926 MPI_Comm comm; 1927 IS isrow_new, iscol_new; 1928 Mat_SeqBAIJ *aij; 1929 1930 PetscFunctionBegin; 1931 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1932 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1933 PetscCallMPI(MPI_Comm_size(comm, &size)); 1934 /* The compression and expansion should be avoided. Doesn't point 1935 out errors, might change the indices, hence buggey */ 1936 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1937 if (isrow == iscol) { 1938 iscol_new = isrow_new; 1939 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1940 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1941 1942 if (call == MAT_REUSE_MATRIX) { 1943 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1944 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1945 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1946 } else { 1947 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1948 } 1949 PetscCall(ISDestroy(&isrow_new)); 1950 PetscCall(ISDestroy(&iscol_new)); 1951 /* 1952 m - number of local rows 1953 n - number of columns (same on all processors) 1954 rstart - first row in new global matrix generated 1955 */ 1956 PetscCall(MatGetBlockSize(mat, &bs)); 1957 PetscCall(MatGetSize(Mreuse, &m, &n)); 1958 m = m / bs; 1959 n = n / bs; 1960 1961 if (call == MAT_INITIAL_MATRIX) { 1962 aij = (Mat_SeqBAIJ *)Mreuse->data; 1963 ii = aij->i; 1964 jj = aij->j; 1965 1966 /* 1967 Determine the number of non-zeros in the diagonal and off-diagonal 1968 portions of the matrix in order to do correct preallocation 1969 */ 1970 1971 /* first get start and end of "diagonal" columns */ 1972 if (csize == PETSC_DECIDE) { 1973 PetscCall(ISGetSize(isrow, &mglobal)); 1974 if (mglobal == n * bs) { /* square matrix */ 1975 nlocal = m; 1976 } else { 1977 nlocal = n / size + ((n % size) > rank); 1978 } 1979 } else { 1980 nlocal = csize / bs; 1981 } 1982 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1983 rstart = rend - nlocal; 1984 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1985 1986 /* next, compute all the lengths */ 1987 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1988 for (i = 0; i < m; i++) { 1989 jend = ii[i + 1] - ii[i]; 1990 olen = 0; 1991 dlen = 0; 1992 for (j = 0; j < jend; j++) { 1993 if (*jj < rstart || *jj >= rend) olen++; 1994 else dlen++; 1995 jj++; 1996 } 1997 olens[i] = olen; 1998 dlens[i] = dlen; 1999 } 2000 PetscCall(MatCreate(comm, &M)); 2001 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2002 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2003 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2004 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2005 PetscCall(PetscFree2(dlens, olens)); 2006 } else { 2007 PetscInt ml, nl; 2008 2009 M = *newmat; 2010 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2011 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2012 PetscCall(MatZeroEntries(M)); 2013 /* 2014 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2015 rather than the slower MatSetValues(). 2016 */ 2017 M->was_assembled = PETSC_TRUE; 2018 M->assembled = PETSC_FALSE; 2019 } 2020 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2021 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2022 aij = (Mat_SeqBAIJ *)Mreuse->data; 2023 ii = aij->i; 2024 jj = aij->j; 2025 aa = aij->a; 2026 for (i = 0; i < m; i++) { 2027 row = rstart / bs + i; 2028 nz = ii[i + 1] - ii[i]; 2029 cwork = jj; 2030 jj = PetscSafePointerPlusOffset(jj, nz); 2031 vwork = aa; 2032 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2033 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2034 } 2035 2036 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2037 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2038 *newmat = M; 2039 2040 /* save submatrix used in processor for next request */ 2041 if (call == MAT_INITIAL_MATRIX) { 2042 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2043 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2044 } 2045 PetscFunctionReturn(PETSC_SUCCESS); 2046 } 2047 2048 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2049 { 2050 MPI_Comm comm, pcomm; 2051 PetscInt clocal_size, nrows; 2052 const PetscInt *rows; 2053 PetscMPIInt size; 2054 IS crowp, lcolp; 2055 2056 PetscFunctionBegin; 2057 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2058 /* make a collective version of 'rowp' */ 2059 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2060 if (pcomm == comm) { 2061 crowp = rowp; 2062 } else { 2063 PetscCall(ISGetSize(rowp, &nrows)); 2064 PetscCall(ISGetIndices(rowp, &rows)); 2065 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2066 PetscCall(ISRestoreIndices(rowp, &rows)); 2067 } 2068 PetscCall(ISSetPermutation(crowp)); 2069 /* make a local version of 'colp' */ 2070 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2071 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2072 if (size == 1) { 2073 lcolp = colp; 2074 } else { 2075 PetscCall(ISAllGather(colp, &lcolp)); 2076 } 2077 PetscCall(ISSetPermutation(lcolp)); 2078 /* now we just get the submatrix */ 2079 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2080 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2081 /* clean up */ 2082 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2083 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2084 PetscFunctionReturn(PETSC_SUCCESS); 2085 } 2086 2087 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2088 { 2089 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2090 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2091 2092 PetscFunctionBegin; 2093 if (nghosts) *nghosts = B->nbs; 2094 if (ghosts) *ghosts = baij->garray; 2095 PetscFunctionReturn(PETSC_SUCCESS); 2096 } 2097 2098 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2099 { 2100 Mat B; 2101 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2102 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2103 Mat_SeqAIJ *b; 2104 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2105 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2106 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2107 2108 PetscFunctionBegin; 2109 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2110 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2111 2112 /* Tell every processor the number of nonzeros per row */ 2113 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2114 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2115 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2116 displs = recvcounts + size; 2117 for (i = 0; i < size; i++) { 2118 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2119 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2120 } 2121 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2122 /* Create the sequential matrix of the same type as the local block diagonal */ 2123 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2124 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2125 PetscCall(MatSetType(B, MATSEQAIJ)); 2126 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2127 b = (Mat_SeqAIJ *)B->data; 2128 2129 /* Copy my part of matrix column indices over */ 2130 sendcount = ad->nz + bd->nz; 2131 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2132 a_jsendbuf = ad->j; 2133 b_jsendbuf = bd->j; 2134 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2135 cnt = 0; 2136 for (i = 0; i < n; i++) { 2137 /* put in lower diagonal portion */ 2138 m = bd->i[i + 1] - bd->i[i]; 2139 while (m > 0) { 2140 /* is it above diagonal (in bd (compressed) numbering) */ 2141 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2142 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2143 m--; 2144 } 2145 2146 /* put in diagonal portion */ 2147 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2148 2149 /* put in upper diagonal portion */ 2150 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2151 } 2152 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2153 2154 /* Gather all column indices to all processors */ 2155 for (i = 0; i < size; i++) { 2156 recvcounts[i] = 0; 2157 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2158 } 2159 displs[0] = 0; 2160 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2161 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2162 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2163 /* set the b->ilen (length of each row) values */ 2164 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2165 /* set the b->i indices */ 2166 b->i[0] = 0; 2167 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2168 PetscCall(PetscFree(lens)); 2169 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2170 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2171 PetscCall(PetscFree(recvcounts)); 2172 2173 PetscCall(MatPropagateSymmetryOptions(A, B)); 2174 *newmat = B; 2175 PetscFunctionReturn(PETSC_SUCCESS); 2176 } 2177 2178 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2179 { 2180 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2181 Vec bb1 = NULL; 2182 2183 PetscFunctionBegin; 2184 if (flag == SOR_APPLY_UPPER) { 2185 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2186 PetscFunctionReturn(PETSC_SUCCESS); 2187 } 2188 2189 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2190 2191 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2192 if (flag & SOR_ZERO_INITIAL_GUESS) { 2193 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2194 its--; 2195 } 2196 2197 while (its--) { 2198 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2199 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2200 2201 /* update rhs: bb1 = bb - B*x */ 2202 PetscCall(VecScale(mat->lvec, -1.0)); 2203 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2204 2205 /* local sweep */ 2206 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2207 } 2208 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2209 if (flag & SOR_ZERO_INITIAL_GUESS) { 2210 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2211 its--; 2212 } 2213 while (its--) { 2214 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2215 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2216 2217 /* update rhs: bb1 = bb - B*x */ 2218 PetscCall(VecScale(mat->lvec, -1.0)); 2219 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2220 2221 /* local sweep */ 2222 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2223 } 2224 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2225 if (flag & SOR_ZERO_INITIAL_GUESS) { 2226 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2227 its--; 2228 } 2229 while (its--) { 2230 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2231 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2232 2233 /* update rhs: bb1 = bb - B*x */ 2234 PetscCall(VecScale(mat->lvec, -1.0)); 2235 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2236 2237 /* local sweep */ 2238 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2239 } 2240 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2241 2242 PetscCall(VecDestroy(&bb1)); 2243 PetscFunctionReturn(PETSC_SUCCESS); 2244 } 2245 2246 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2247 { 2248 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2249 PetscInt m, N, i, *garray = aij->garray; 2250 PetscInt ib, jb, bs = A->rmap->bs; 2251 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2252 MatScalar *a_val = a_aij->a; 2253 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2254 MatScalar *b_val = b_aij->a; 2255 PetscReal *work; 2256 2257 PetscFunctionBegin; 2258 PetscCall(MatGetSize(A, &m, &N)); 2259 PetscCall(PetscCalloc1(N, &work)); 2260 if (type == NORM_2) { 2261 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2262 for (jb = 0; jb < bs; jb++) { 2263 for (ib = 0; ib < bs; ib++) { 2264 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2265 a_val++; 2266 } 2267 } 2268 } 2269 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2270 for (jb = 0; jb < bs; jb++) { 2271 for (ib = 0; ib < bs; ib++) { 2272 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2273 b_val++; 2274 } 2275 } 2276 } 2277 } else if (type == NORM_1) { 2278 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2279 for (jb = 0; jb < bs; jb++) { 2280 for (ib = 0; ib < bs; ib++) { 2281 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2282 a_val++; 2283 } 2284 } 2285 } 2286 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2287 for (jb = 0; jb < bs; jb++) { 2288 for (ib = 0; ib < bs; ib++) { 2289 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2290 b_val++; 2291 } 2292 } 2293 } 2294 } else if (type == NORM_INFINITY) { 2295 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2296 for (jb = 0; jb < bs; jb++) { 2297 for (ib = 0; ib < bs; ib++) { 2298 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2299 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2300 a_val++; 2301 } 2302 } 2303 } 2304 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2305 for (jb = 0; jb < bs; jb++) { 2306 for (ib = 0; ib < bs; ib++) { 2307 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2308 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2309 b_val++; 2310 } 2311 } 2312 } 2313 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2314 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2315 for (jb = 0; jb < bs; jb++) { 2316 for (ib = 0; ib < bs; ib++) { 2317 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2318 a_val++; 2319 } 2320 } 2321 } 2322 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2323 for (jb = 0; jb < bs; jb++) { 2324 for (ib = 0; ib < bs; ib++) { 2325 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2326 b_val++; 2327 } 2328 } 2329 } 2330 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2331 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2332 for (jb = 0; jb < bs; jb++) { 2333 for (ib = 0; ib < bs; ib++) { 2334 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2335 a_val++; 2336 } 2337 } 2338 } 2339 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2340 for (jb = 0; jb < bs; jb++) { 2341 for (ib = 0; ib < bs; ib++) { 2342 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2343 b_val++; 2344 } 2345 } 2346 } 2347 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2348 if (type == NORM_INFINITY) { 2349 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2350 } else { 2351 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2352 } 2353 PetscCall(PetscFree(work)); 2354 if (type == NORM_2) { 2355 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2356 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2357 for (i = 0; i < N; i++) reductions[i] /= m; 2358 } 2359 PetscFunctionReturn(PETSC_SUCCESS); 2360 } 2361 2362 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2363 { 2364 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2365 2366 PetscFunctionBegin; 2367 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2368 A->factorerrortype = a->A->factorerrortype; 2369 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2370 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2371 PetscFunctionReturn(PETSC_SUCCESS); 2372 } 2373 2374 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2375 { 2376 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2377 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2378 2379 PetscFunctionBegin; 2380 if (!Y->preallocated) { 2381 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2382 } else if (!aij->nz) { 2383 PetscInt nonew = aij->nonew; 2384 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2385 aij->nonew = nonew; 2386 } 2387 PetscCall(MatShift_Basic(Y, a)); 2388 PetscFunctionReturn(PETSC_SUCCESS); 2389 } 2390 2391 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2392 { 2393 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2394 2395 PetscFunctionBegin; 2396 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2397 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2398 if (d) { 2399 PetscInt rstart; 2400 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2401 *d += rstart / A->rmap->bs; 2402 } 2403 PetscFunctionReturn(PETSC_SUCCESS); 2404 } 2405 2406 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2407 { 2408 PetscFunctionBegin; 2409 *a = ((Mat_MPIBAIJ *)A->data)->A; 2410 PetscFunctionReturn(PETSC_SUCCESS); 2411 } 2412 2413 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2414 { 2415 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2416 2417 PetscFunctionBegin; 2418 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2419 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2420 PetscFunctionReturn(PETSC_SUCCESS); 2421 } 2422 2423 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2424 MatGetRow_MPIBAIJ, 2425 MatRestoreRow_MPIBAIJ, 2426 MatMult_MPIBAIJ, 2427 /* 4*/ MatMultAdd_MPIBAIJ, 2428 MatMultTranspose_MPIBAIJ, 2429 MatMultTransposeAdd_MPIBAIJ, 2430 NULL, 2431 NULL, 2432 NULL, 2433 /*10*/ NULL, 2434 NULL, 2435 NULL, 2436 MatSOR_MPIBAIJ, 2437 MatTranspose_MPIBAIJ, 2438 /*15*/ MatGetInfo_MPIBAIJ, 2439 MatEqual_MPIBAIJ, 2440 MatGetDiagonal_MPIBAIJ, 2441 MatDiagonalScale_MPIBAIJ, 2442 MatNorm_MPIBAIJ, 2443 /*20*/ MatAssemblyBegin_MPIBAIJ, 2444 MatAssemblyEnd_MPIBAIJ, 2445 MatSetOption_MPIBAIJ, 2446 MatZeroEntries_MPIBAIJ, 2447 /*24*/ MatZeroRows_MPIBAIJ, 2448 NULL, 2449 NULL, 2450 NULL, 2451 NULL, 2452 /*29*/ MatSetUp_MPI_Hash, 2453 NULL, 2454 NULL, 2455 MatGetDiagonalBlock_MPIBAIJ, 2456 NULL, 2457 /*34*/ MatDuplicate_MPIBAIJ, 2458 NULL, 2459 NULL, 2460 NULL, 2461 NULL, 2462 /*39*/ MatAXPY_MPIBAIJ, 2463 MatCreateSubMatrices_MPIBAIJ, 2464 MatIncreaseOverlap_MPIBAIJ, 2465 MatGetValues_MPIBAIJ, 2466 MatCopy_MPIBAIJ, 2467 /*44*/ NULL, 2468 MatScale_MPIBAIJ, 2469 MatShift_MPIBAIJ, 2470 NULL, 2471 MatZeroRowsColumns_MPIBAIJ, 2472 /*49*/ NULL, 2473 NULL, 2474 NULL, 2475 NULL, 2476 NULL, 2477 /*54*/ MatFDColoringCreate_MPIXAIJ, 2478 NULL, 2479 MatSetUnfactored_MPIBAIJ, 2480 MatPermute_MPIBAIJ, 2481 MatSetValuesBlocked_MPIBAIJ, 2482 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2483 MatDestroy_MPIBAIJ, 2484 MatView_MPIBAIJ, 2485 NULL, 2486 NULL, 2487 /*64*/ NULL, 2488 NULL, 2489 NULL, 2490 NULL, 2491 NULL, 2492 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2493 NULL, 2494 NULL, 2495 NULL, 2496 NULL, 2497 /*74*/ NULL, 2498 MatFDColoringApply_BAIJ, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*79*/ NULL, 2503 NULL, 2504 NULL, 2505 NULL, 2506 MatLoad_MPIBAIJ, 2507 /*84*/ NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 NULL, 2512 /*89*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 /*94*/ NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 /*99*/ NULL, 2523 NULL, 2524 NULL, 2525 MatConjugate_MPIBAIJ, 2526 NULL, 2527 /*104*/ NULL, 2528 MatRealPart_MPIBAIJ, 2529 MatImaginaryPart_MPIBAIJ, 2530 NULL, 2531 NULL, 2532 /*109*/ NULL, 2533 NULL, 2534 NULL, 2535 NULL, 2536 MatMissingDiagonal_MPIBAIJ, 2537 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2538 NULL, 2539 MatGetGhosts_MPIBAIJ, 2540 NULL, 2541 NULL, 2542 /*119*/ NULL, 2543 NULL, 2544 NULL, 2545 NULL, 2546 MatGetMultiProcBlock_MPIBAIJ, 2547 /*124*/ NULL, 2548 MatGetColumnReductions_MPIBAIJ, 2549 MatInvertBlockDiagonal_MPIBAIJ, 2550 NULL, 2551 NULL, 2552 /*129*/ NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 NULL, 2557 /*134*/ NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 NULL, 2562 /*139*/ MatSetBlockSizes_Default, 2563 NULL, 2564 NULL, 2565 MatFDColoringSetUp_MPIXAIJ, 2566 NULL, 2567 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2568 NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 NULL, 2573 /*150*/ NULL, 2574 MatEliminateZeros_MPIBAIJ, 2575 MatGetRowSumAbs_MPIBAIJ, 2576 NULL, 2577 NULL, 2578 /*155*/ NULL, 2579 MatCopyHashToXAIJ_MPI_Hash}; 2580 2581 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2582 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2583 2584 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2585 { 2586 PetscInt m, rstart, cstart, cend; 2587 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2588 const PetscInt *JJ = NULL; 2589 PetscScalar *values = NULL; 2590 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2591 PetscBool nooffprocentries; 2592 2593 PetscFunctionBegin; 2594 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2595 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2596 PetscCall(PetscLayoutSetUp(B->rmap)); 2597 PetscCall(PetscLayoutSetUp(B->cmap)); 2598 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2599 m = B->rmap->n / bs; 2600 rstart = B->rmap->rstart / bs; 2601 cstart = B->cmap->rstart / bs; 2602 cend = B->cmap->rend / bs; 2603 2604 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2605 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2606 for (i = 0; i < m; i++) { 2607 nz = ii[i + 1] - ii[i]; 2608 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2609 nz_max = PetscMax(nz_max, nz); 2610 dlen = 0; 2611 olen = 0; 2612 JJ = jj + ii[i]; 2613 for (j = 0; j < nz; j++) { 2614 if (*JJ < cstart || *JJ >= cend) olen++; 2615 else dlen++; 2616 JJ++; 2617 } 2618 d_nnz[i] = dlen; 2619 o_nnz[i] = olen; 2620 } 2621 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2622 PetscCall(PetscFree2(d_nnz, o_nnz)); 2623 2624 values = (PetscScalar *)V; 2625 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2626 for (i = 0; i < m; i++) { 2627 PetscInt row = i + rstart; 2628 PetscInt ncols = ii[i + 1] - ii[i]; 2629 const PetscInt *icols = jj + ii[i]; 2630 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2631 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2632 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2633 } else { /* block ordering does not match so we can only insert one block at a time. */ 2634 PetscInt j; 2635 for (j = 0; j < ncols; j++) { 2636 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2637 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2638 } 2639 } 2640 } 2641 2642 if (!V) PetscCall(PetscFree(values)); 2643 nooffprocentries = B->nooffprocentries; 2644 B->nooffprocentries = PETSC_TRUE; 2645 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2646 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2647 B->nooffprocentries = nooffprocentries; 2648 2649 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 /*@C 2654 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2655 2656 Collective 2657 2658 Input Parameters: 2659 + B - the matrix 2660 . bs - the block size 2661 . i - the indices into `j` for the start of each local row (starts with zero) 2662 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2663 - v - optional values in the matrix, use `NULL` if not provided 2664 2665 Level: advanced 2666 2667 Notes: 2668 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2669 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2670 called this routine. 2671 2672 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2673 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2674 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2675 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2676 block column and the second index is over columns within a block. 2677 2678 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2679 2680 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2681 @*/ 2682 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2683 { 2684 PetscFunctionBegin; 2685 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2686 PetscValidType(B, 1); 2687 PetscValidLogicalCollectiveInt(B, bs, 2); 2688 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2689 PetscFunctionReturn(PETSC_SUCCESS); 2690 } 2691 2692 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2693 { 2694 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2695 PetscInt i; 2696 PetscMPIInt size; 2697 2698 PetscFunctionBegin; 2699 if (B->hash_active) { 2700 B->ops[0] = b->cops; 2701 B->hash_active = PETSC_FALSE; 2702 } 2703 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2704 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2705 PetscCall(PetscLayoutSetUp(B->rmap)); 2706 PetscCall(PetscLayoutSetUp(B->cmap)); 2707 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2708 2709 if (d_nnz) { 2710 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2711 } 2712 if (o_nnz) { 2713 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2714 } 2715 2716 b->bs2 = bs * bs; 2717 b->mbs = B->rmap->n / bs; 2718 b->nbs = B->cmap->n / bs; 2719 b->Mbs = B->rmap->N / bs; 2720 b->Nbs = B->cmap->N / bs; 2721 2722 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2723 b->rstartbs = B->rmap->rstart / bs; 2724 b->rendbs = B->rmap->rend / bs; 2725 b->cstartbs = B->cmap->rstart / bs; 2726 b->cendbs = B->cmap->rend / bs; 2727 2728 #if defined(PETSC_USE_CTABLE) 2729 PetscCall(PetscHMapIDestroy(&b->colmap)); 2730 #else 2731 PetscCall(PetscFree(b->colmap)); 2732 #endif 2733 PetscCall(PetscFree(b->garray)); 2734 PetscCall(VecDestroy(&b->lvec)); 2735 PetscCall(VecScatterDestroy(&b->Mvctx)); 2736 2737 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2738 2739 MatSeqXAIJGetOptions_Private(b->B); 2740 PetscCall(MatDestroy(&b->B)); 2741 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2742 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2743 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2744 MatSeqXAIJRestoreOptions_Private(b->B); 2745 2746 MatSeqXAIJGetOptions_Private(b->A); 2747 PetscCall(MatDestroy(&b->A)); 2748 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2749 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2750 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2751 MatSeqXAIJRestoreOptions_Private(b->A); 2752 2753 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2754 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2755 B->preallocated = PETSC_TRUE; 2756 B->was_assembled = PETSC_FALSE; 2757 B->assembled = PETSC_FALSE; 2758 PetscFunctionReturn(PETSC_SUCCESS); 2759 } 2760 2761 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2762 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2763 2764 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2765 { 2766 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2767 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2768 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2769 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2770 2771 PetscFunctionBegin; 2772 PetscCall(PetscMalloc1(M + 1, &ii)); 2773 ii[0] = 0; 2774 for (i = 0; i < M; i++) { 2775 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2776 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2777 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2778 /* remove one from count of matrix has diagonal */ 2779 for (j = id[i]; j < id[i + 1]; j++) { 2780 if (jd[j] == i) { 2781 ii[i + 1]--; 2782 break; 2783 } 2784 } 2785 } 2786 PetscCall(PetscMalloc1(ii[M], &jj)); 2787 cnt = 0; 2788 for (i = 0; i < M; i++) { 2789 for (j = io[i]; j < io[i + 1]; j++) { 2790 if (garray[jo[j]] > rstart) break; 2791 jj[cnt++] = garray[jo[j]]; 2792 } 2793 for (k = id[i]; k < id[i + 1]; k++) { 2794 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2795 } 2796 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2797 } 2798 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2799 PetscFunctionReturn(PETSC_SUCCESS); 2800 } 2801 2802 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2803 2804 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2805 2806 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2807 { 2808 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2809 Mat_MPIAIJ *b; 2810 Mat B; 2811 2812 PetscFunctionBegin; 2813 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2814 2815 if (reuse == MAT_REUSE_MATRIX) { 2816 B = *newmat; 2817 } else { 2818 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2819 PetscCall(MatSetType(B, MATMPIAIJ)); 2820 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2821 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2822 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2823 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2824 } 2825 b = (Mat_MPIAIJ *)B->data; 2826 2827 if (reuse == MAT_REUSE_MATRIX) { 2828 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2829 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2830 } else { 2831 PetscInt *garray = a->garray; 2832 Mat_SeqAIJ *bB; 2833 PetscInt bs, nnz; 2834 PetscCall(MatDestroy(&b->A)); 2835 PetscCall(MatDestroy(&b->B)); 2836 /* just clear out the data structure */ 2837 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2838 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2839 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2840 2841 /* Global numbering for b->B columns */ 2842 bB = (Mat_SeqAIJ *)b->B->data; 2843 bs = A->rmap->bs; 2844 nnz = bB->i[A->rmap->n]; 2845 for (PetscInt k = 0; k < nnz; k++) { 2846 PetscInt bj = bB->j[k] / bs; 2847 PetscInt br = bB->j[k] % bs; 2848 bB->j[k] = garray[bj] * bs + br; 2849 } 2850 } 2851 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2852 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2853 2854 if (reuse == MAT_INPLACE_MATRIX) { 2855 PetscCall(MatHeaderReplace(A, &B)); 2856 } else { 2857 *newmat = B; 2858 } 2859 PetscFunctionReturn(PETSC_SUCCESS); 2860 } 2861 2862 /*MC 2863 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2864 2865 Options Database Keys: 2866 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2867 . -mat_block_size <bs> - set the blocksize used to store the matrix 2868 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2869 - -mat_use_hash_table <fact> - set hash table factor 2870 2871 Level: beginner 2872 2873 Note: 2874 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2875 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2876 2877 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2878 M*/ 2879 2880 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2881 2882 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2883 { 2884 Mat_MPIBAIJ *b; 2885 PetscBool flg = PETSC_FALSE; 2886 2887 PetscFunctionBegin; 2888 PetscCall(PetscNew(&b)); 2889 B->data = (void *)b; 2890 B->ops[0] = MatOps_Values; 2891 B->assembled = PETSC_FALSE; 2892 2893 B->insertmode = NOT_SET_VALUES; 2894 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2895 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2896 2897 /* build local table of row and column ownerships */ 2898 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2899 2900 /* build cache for off array entries formed */ 2901 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2902 2903 b->donotstash = PETSC_FALSE; 2904 b->colmap = NULL; 2905 b->garray = NULL; 2906 b->roworiented = PETSC_TRUE; 2907 2908 /* stuff used in block assembly */ 2909 b->barray = NULL; 2910 2911 /* stuff used for matrix vector multiply */ 2912 b->lvec = NULL; 2913 b->Mvctx = NULL; 2914 2915 /* stuff for MatGetRow() */ 2916 b->rowindices = NULL; 2917 b->rowvalues = NULL; 2918 b->getrowactive = PETSC_FALSE; 2919 2920 /* hash table stuff */ 2921 b->ht = NULL; 2922 b->hd = NULL; 2923 b->ht_size = 0; 2924 b->ht_flag = PETSC_FALSE; 2925 b->ht_fact = 0; 2926 b->ht_total_ct = 0; 2927 b->ht_insert_ct = 0; 2928 2929 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2930 b->ijonly = PETSC_FALSE; 2931 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2935 #if defined(PETSC_HAVE_HYPRE) 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2937 #endif 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2945 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2946 2947 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2948 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2949 if (flg) { 2950 PetscReal fact = 1.39; 2951 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2952 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2953 if (fact <= 1.0) fact = 1.39; 2954 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2955 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2956 } 2957 PetscOptionsEnd(); 2958 PetscFunctionReturn(PETSC_SUCCESS); 2959 } 2960 2961 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2962 /*MC 2963 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2964 2965 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2966 and `MATMPIBAIJ` otherwise. 2967 2968 Options Database Keys: 2969 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2970 2971 Level: beginner 2972 2973 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2974 M*/ 2975 2976 /*@ 2977 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2978 (block compressed row). 2979 2980 Collective 2981 2982 Input Parameters: 2983 + B - the matrix 2984 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2985 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2986 . d_nz - number of block nonzeros per block row in diagonal portion of local 2987 submatrix (same for all local rows) 2988 . d_nnz - array containing the number of block nonzeros in the various block rows 2989 of the in diagonal portion of the local (possibly different for each block 2990 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2991 set it even if it is zero. 2992 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2993 submatrix (same for all local rows). 2994 - o_nnz - array containing the number of nonzeros in the various block rows of the 2995 off-diagonal portion of the local submatrix (possibly different for 2996 each block row) or `NULL`. 2997 2998 If the *_nnz parameter is given then the *_nz parameter is ignored 2999 3000 Options Database Keys: 3001 + -mat_block_size - size of the blocks to use 3002 - -mat_use_hash_table <fact> - set hash table factor 3003 3004 Level: intermediate 3005 3006 Notes: 3007 For good matrix assembly performance 3008 the user should preallocate the matrix storage by setting the parameters 3009 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3010 performance can be increased by more than a factor of 50. 3011 3012 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3013 than it must be used on all processors that share the object for that argument. 3014 3015 Storage Information: 3016 For a square global matrix we define each processor's diagonal portion 3017 to be its local rows and the corresponding columns (a square submatrix); 3018 each processor's off-diagonal portion encompasses the remainder of the 3019 local matrix (a rectangular submatrix). 3020 3021 The user can specify preallocated storage for the diagonal part of 3022 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3023 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3024 memory allocation. Likewise, specify preallocated storage for the 3025 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3026 3027 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3028 the figure below we depict these three local rows and all columns (0-11). 3029 3030 .vb 3031 0 1 2 3 4 5 6 7 8 9 10 11 3032 -------------------------- 3033 row 3 |o o o d d d o o o o o o 3034 row 4 |o o o d d d o o o o o o 3035 row 5 |o o o d d d o o o o o o 3036 -------------------------- 3037 .ve 3038 3039 Thus, any entries in the d locations are stored in the d (diagonal) 3040 submatrix, and any entries in the o locations are stored in the 3041 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3042 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3043 3044 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3045 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3046 In general, for PDE problems in which most nonzeros are near the diagonal, 3047 one expects `d_nz` >> `o_nz`. 3048 3049 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3050 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3051 You can also run with the option `-info` and look for messages with the string 3052 malloc in them to see if additional memory allocation was needed. 3053 3054 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3055 @*/ 3056 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3057 { 3058 PetscFunctionBegin; 3059 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3060 PetscValidType(B, 1); 3061 PetscValidLogicalCollectiveInt(B, bs, 2); 3062 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3063 PetscFunctionReturn(PETSC_SUCCESS); 3064 } 3065 3066 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3067 /*@ 3068 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3069 (block compressed row). 3070 3071 Collective 3072 3073 Input Parameters: 3074 + comm - MPI communicator 3075 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3076 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3077 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3078 This value should be the same as the local size used in creating the 3079 y vector for the matrix-vector product y = Ax. 3080 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3081 This value should be the same as the local size used in creating the 3082 x vector for the matrix-vector product y = Ax. 3083 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3084 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3085 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3086 submatrix (same for all local rows) 3087 . d_nnz - array containing the number of nonzero blocks in the various block rows 3088 of the in diagonal portion of the local (possibly different for each block 3089 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3090 and set it even if it is zero. 3091 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3092 submatrix (same for all local rows). 3093 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3094 off-diagonal portion of the local submatrix (possibly different for 3095 each block row) or NULL. 3096 3097 Output Parameter: 3098 . A - the matrix 3099 3100 Options Database Keys: 3101 + -mat_block_size - size of the blocks to use 3102 - -mat_use_hash_table <fact> - set hash table factor 3103 3104 Level: intermediate 3105 3106 Notes: 3107 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3108 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3109 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3110 3111 For good matrix assembly performance 3112 the user should preallocate the matrix storage by setting the parameters 3113 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3114 performance can be increased by more than a factor of 50. 3115 3116 If the *_nnz parameter is given then the *_nz parameter is ignored 3117 3118 A nonzero block is any block that as 1 or more nonzeros in it 3119 3120 The user MUST specify either the local or global matrix dimensions 3121 (possibly both). 3122 3123 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3124 than it must be used on all processors that share the object for that argument. 3125 3126 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3127 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3128 3129 Storage Information: 3130 For a square global matrix we define each processor's diagonal portion 3131 to be its local rows and the corresponding columns (a square submatrix); 3132 each processor's off-diagonal portion encompasses the remainder of the 3133 local matrix (a rectangular submatrix). 3134 3135 The user can specify preallocated storage for the diagonal part of 3136 the local submatrix with either d_nz or d_nnz (not both). Set 3137 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3138 memory allocation. Likewise, specify preallocated storage for the 3139 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3140 3141 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3142 the figure below we depict these three local rows and all columns (0-11). 3143 3144 .vb 3145 0 1 2 3 4 5 6 7 8 9 10 11 3146 -------------------------- 3147 row 3 |o o o d d d o o o o o o 3148 row 4 |o o o d d d o o o o o o 3149 row 5 |o o o d d d o o o o o o 3150 -------------------------- 3151 .ve 3152 3153 Thus, any entries in the d locations are stored in the d (diagonal) 3154 submatrix, and any entries in the o locations are stored in the 3155 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3156 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3157 3158 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3159 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3160 In general, for PDE problems in which most nonzeros are near the diagonal, 3161 one expects `d_nz` >> `o_nz`. 3162 3163 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3164 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3165 @*/ 3166 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3167 { 3168 PetscMPIInt size; 3169 3170 PetscFunctionBegin; 3171 PetscCall(MatCreate(comm, A)); 3172 PetscCall(MatSetSizes(*A, m, n, M, N)); 3173 PetscCallMPI(MPI_Comm_size(comm, &size)); 3174 if (size > 1) { 3175 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3176 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3177 } else { 3178 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3179 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3180 } 3181 PetscFunctionReturn(PETSC_SUCCESS); 3182 } 3183 3184 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3185 { 3186 Mat mat; 3187 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3188 PetscInt len = 0; 3189 3190 PetscFunctionBegin; 3191 *newmat = NULL; 3192 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3193 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3194 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3195 3196 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3197 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3198 if (matin->hash_active) { 3199 PetscCall(MatSetUp(mat)); 3200 } else { 3201 mat->factortype = matin->factortype; 3202 mat->preallocated = PETSC_TRUE; 3203 mat->assembled = PETSC_TRUE; 3204 mat->insertmode = NOT_SET_VALUES; 3205 3206 a = (Mat_MPIBAIJ *)mat->data; 3207 mat->rmap->bs = matin->rmap->bs; 3208 a->bs2 = oldmat->bs2; 3209 a->mbs = oldmat->mbs; 3210 a->nbs = oldmat->nbs; 3211 a->Mbs = oldmat->Mbs; 3212 a->Nbs = oldmat->Nbs; 3213 3214 a->size = oldmat->size; 3215 a->rank = oldmat->rank; 3216 a->donotstash = oldmat->donotstash; 3217 a->roworiented = oldmat->roworiented; 3218 a->rowindices = NULL; 3219 a->rowvalues = NULL; 3220 a->getrowactive = PETSC_FALSE; 3221 a->barray = NULL; 3222 a->rstartbs = oldmat->rstartbs; 3223 a->rendbs = oldmat->rendbs; 3224 a->cstartbs = oldmat->cstartbs; 3225 a->cendbs = oldmat->cendbs; 3226 3227 /* hash table stuff */ 3228 a->ht = NULL; 3229 a->hd = NULL; 3230 a->ht_size = 0; 3231 a->ht_flag = oldmat->ht_flag; 3232 a->ht_fact = oldmat->ht_fact; 3233 a->ht_total_ct = 0; 3234 a->ht_insert_ct = 0; 3235 3236 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3237 if (oldmat->colmap) { 3238 #if defined(PETSC_USE_CTABLE) 3239 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3240 #else 3241 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3242 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3243 #endif 3244 } else a->colmap = NULL; 3245 3246 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3247 PetscCall(PetscMalloc1(len, &a->garray)); 3248 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3249 } else a->garray = NULL; 3250 3251 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3252 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3253 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3254 3255 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3256 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3257 } 3258 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3259 *newmat = mat; 3260 PetscFunctionReturn(PETSC_SUCCESS); 3261 } 3262 3263 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3264 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3265 { 3266 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3267 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3268 PetscScalar *matvals; 3269 3270 PetscFunctionBegin; 3271 PetscCall(PetscViewerSetUp(viewer)); 3272 3273 /* read in matrix header */ 3274 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3275 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3276 M = header[1]; 3277 N = header[2]; 3278 nz = header[3]; 3279 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3280 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3281 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3282 3283 /* set block sizes from the viewer's .info file */ 3284 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3285 /* set local sizes if not set already */ 3286 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3287 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3288 /* set global sizes if not set already */ 3289 if (mat->rmap->N < 0) mat->rmap->N = M; 3290 if (mat->cmap->N < 0) mat->cmap->N = N; 3291 PetscCall(PetscLayoutSetUp(mat->rmap)); 3292 PetscCall(PetscLayoutSetUp(mat->cmap)); 3293 3294 /* check if the matrix sizes are correct */ 3295 PetscCall(MatGetSize(mat, &rows, &cols)); 3296 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3297 PetscCall(MatGetBlockSize(mat, &bs)); 3298 PetscCall(MatGetLocalSize(mat, &m, &n)); 3299 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3300 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3301 mbs = m / bs; 3302 nbs = n / bs; 3303 3304 /* read in row lengths and build row indices */ 3305 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3306 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3307 rowidxs[0] = 0; 3308 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3309 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3310 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3311 3312 /* read in column indices and matrix values */ 3313 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3314 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3315 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3316 3317 { /* preallocate matrix storage */ 3318 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3319 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3320 PetscBool sbaij, done; 3321 PetscInt *d_nnz, *o_nnz; 3322 3323 PetscCall(PetscBTCreate(nbs, &bt)); 3324 PetscCall(PetscHSetICreate(&ht)); 3325 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3326 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3327 for (i = 0; i < mbs; i++) { 3328 PetscCall(PetscBTMemzero(nbs, bt)); 3329 PetscCall(PetscHSetIClear(ht)); 3330 for (k = 0; k < bs; k++) { 3331 PetscInt row = bs * i + k; 3332 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3333 PetscInt col = colidxs[j]; 3334 if (!sbaij || col >= row) { 3335 if (col >= cs && col < ce) { 3336 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3337 } else { 3338 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3339 if (done) o_nnz[i]++; 3340 } 3341 } 3342 } 3343 } 3344 } 3345 PetscCall(PetscBTDestroy(&bt)); 3346 PetscCall(PetscHSetIDestroy(&ht)); 3347 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3348 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3349 PetscCall(PetscFree2(d_nnz, o_nnz)); 3350 } 3351 3352 /* store matrix values */ 3353 for (i = 0; i < m; i++) { 3354 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3355 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3356 } 3357 3358 PetscCall(PetscFree(rowidxs)); 3359 PetscCall(PetscFree2(colidxs, matvals)); 3360 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3361 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3362 PetscFunctionReturn(PETSC_SUCCESS); 3363 } 3364 3365 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3366 { 3367 PetscBool isbinary; 3368 3369 PetscFunctionBegin; 3370 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3371 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3372 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3373 PetscFunctionReturn(PETSC_SUCCESS); 3374 } 3375 3376 /*@ 3377 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3378 3379 Input Parameters: 3380 + mat - the matrix 3381 - fact - factor 3382 3383 Options Database Key: 3384 . -mat_use_hash_table <fact> - provide the factor 3385 3386 Level: advanced 3387 3388 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3389 @*/ 3390 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3391 { 3392 PetscFunctionBegin; 3393 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3394 PetscFunctionReturn(PETSC_SUCCESS); 3395 } 3396 3397 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3398 { 3399 Mat_MPIBAIJ *baij; 3400 3401 PetscFunctionBegin; 3402 baij = (Mat_MPIBAIJ *)mat->data; 3403 baij->ht_fact = fact; 3404 PetscFunctionReturn(PETSC_SUCCESS); 3405 } 3406 3407 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3408 { 3409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3410 PetscBool flg; 3411 3412 PetscFunctionBegin; 3413 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3414 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3415 if (Ad) *Ad = a->A; 3416 if (Ao) *Ao = a->B; 3417 if (colmap) *colmap = a->garray; 3418 PetscFunctionReturn(PETSC_SUCCESS); 3419 } 3420 3421 /* 3422 Special version for direct calls from Fortran (to eliminate two function call overheads 3423 */ 3424 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3425 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3426 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3427 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3428 #endif 3429 3430 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3431 /*@C 3432 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3433 3434 Collective 3435 3436 Input Parameters: 3437 + matin - the matrix 3438 . min - number of input rows 3439 . im - input rows 3440 . nin - number of input columns 3441 . in - input columns 3442 . v - numerical values input 3443 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3444 3445 Level: advanced 3446 3447 Developer Notes: 3448 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3449 3450 .seealso: `Mat`, `MatSetValuesBlocked()` 3451 @*/ 3452 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3453 { 3454 /* convert input arguments to C version */ 3455 Mat mat = *matin; 3456 PetscInt m = *min, n = *nin; 3457 InsertMode addv = *addvin; 3458 3459 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3460 const MatScalar *value; 3461 MatScalar *barray = baij->barray; 3462 PetscBool roworiented = baij->roworiented; 3463 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3464 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3465 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3466 3467 PetscFunctionBegin; 3468 /* tasks normally handled by MatSetValuesBlocked() */ 3469 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3470 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3471 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3472 if (mat->assembled) { 3473 mat->was_assembled = PETSC_TRUE; 3474 mat->assembled = PETSC_FALSE; 3475 } 3476 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3477 3478 if (!barray) { 3479 PetscCall(PetscMalloc1(bs2, &barray)); 3480 baij->barray = barray; 3481 } 3482 3483 if (roworiented) stepval = (n - 1) * bs; 3484 else stepval = (m - 1) * bs; 3485 3486 for (i = 0; i < m; i++) { 3487 if (im[i] < 0) continue; 3488 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3489 if (im[i] >= rstart && im[i] < rend) { 3490 row = im[i] - rstart; 3491 for (j = 0; j < n; j++) { 3492 /* If NumCol = 1 then a copy is not required */ 3493 if ((roworiented) && (n == 1)) { 3494 barray = (MatScalar *)v + i * bs2; 3495 } else if ((!roworiented) && (m == 1)) { 3496 barray = (MatScalar *)v + j * bs2; 3497 } else { /* Here a copy is required */ 3498 if (roworiented) { 3499 value = v + i * (stepval + bs) * bs + j * bs; 3500 } else { 3501 value = v + j * (stepval + bs) * bs + i * bs; 3502 } 3503 for (ii = 0; ii < bs; ii++, value += stepval) { 3504 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3505 } 3506 barray -= bs2; 3507 } 3508 3509 if (in[j] >= cstart && in[j] < cend) { 3510 col = in[j] - cstart; 3511 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3512 } else if (in[j] < 0) { 3513 continue; 3514 } else { 3515 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3516 if (mat->was_assembled) { 3517 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3518 3519 #if defined(PETSC_USE_DEBUG) 3520 #if defined(PETSC_USE_CTABLE) 3521 { 3522 PetscInt data; 3523 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3524 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3525 } 3526 #else 3527 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3528 #endif 3529 #endif 3530 #if defined(PETSC_USE_CTABLE) 3531 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3532 col = (col - 1) / bs; 3533 #else 3534 col = (baij->colmap[in[j]] - 1) / bs; 3535 #endif 3536 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3537 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3538 col = in[j]; 3539 } 3540 } else col = in[j]; 3541 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3542 } 3543 } 3544 } else { 3545 if (!baij->donotstash) { 3546 if (roworiented) { 3547 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3548 } else { 3549 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3550 } 3551 } 3552 } 3553 } 3554 3555 /* task normally handled by MatSetValuesBlocked() */ 3556 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3557 PetscFunctionReturn(PETSC_SUCCESS); 3558 } 3559 3560 /*@ 3561 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3562 3563 Collective 3564 3565 Input Parameters: 3566 + comm - MPI communicator 3567 . bs - the block size, only a block size of 1 is supported 3568 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3569 . n - This value should be the same as the local size used in creating the 3570 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3571 calculated if `N` is given) For square matrices `n` is almost always `m`. 3572 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3573 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3574 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3575 . j - column indices 3576 - a - matrix values 3577 3578 Output Parameter: 3579 . mat - the matrix 3580 3581 Level: intermediate 3582 3583 Notes: 3584 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3585 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3586 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3587 3588 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3589 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3590 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3591 with column-major ordering within blocks. 3592 3593 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3594 3595 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3596 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3597 @*/ 3598 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3599 { 3600 PetscFunctionBegin; 3601 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3602 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3603 PetscCall(MatCreate(comm, mat)); 3604 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3605 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3606 PetscCall(MatSetBlockSize(*mat, bs)); 3607 PetscCall(MatSetUp(*mat)); 3608 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3609 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3610 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3611 PetscFunctionReturn(PETSC_SUCCESS); 3612 } 3613 3614 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3615 { 3616 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3617 PetscInt *indx; 3618 PetscScalar *values; 3619 3620 PetscFunctionBegin; 3621 PetscCall(MatGetSize(inmat, &m, &N)); 3622 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3623 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3624 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3625 PetscInt *bindx, rmax = a->rmax, j; 3626 PetscMPIInt rank, size; 3627 3628 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3629 mbs = m / bs; 3630 Nbs = N / cbs; 3631 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3632 nbs = n / cbs; 3633 3634 PetscCall(PetscMalloc1(rmax, &bindx)); 3635 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3636 3637 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3638 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3639 if (rank == size - 1) { 3640 /* Check sum(nbs) = Nbs */ 3641 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3642 } 3643 3644 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3645 for (i = 0; i < mbs; i++) { 3646 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3647 nnz = nnz / bs; 3648 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3649 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3650 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3651 } 3652 PetscCall(PetscFree(bindx)); 3653 3654 PetscCall(MatCreate(comm, outmat)); 3655 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3656 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3657 PetscCall(MatSetType(*outmat, MATBAIJ)); 3658 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3659 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3660 MatPreallocateEnd(dnz, onz); 3661 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3662 } 3663 3664 /* numeric phase */ 3665 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3666 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3667 3668 for (i = 0; i < m; i++) { 3669 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3670 Ii = i + rstart; 3671 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3672 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3673 } 3674 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3675 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3676 PetscFunctionReturn(PETSC_SUCCESS); 3677 } 3678