1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *vv; 62 Vec vB, vA; 63 const PetscScalar *va, *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayRead(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(v, &vv)); 92 PetscCall(VecRestoreArrayRead(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 690 { 691 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 692 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 693 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 694 PetscReal sum = 0.0; 695 MatScalar *v; 696 697 PetscFunctionBegin; 698 if (baij->size == 1) { 699 PetscCall(MatNorm(baij->A, type, nrm)); 700 } else { 701 if (type == NORM_FROBENIUS) { 702 v = amat->a; 703 nz = amat->nz * bs2; 704 for (i = 0; i < nz; i++) { 705 sum += PetscRealPart(PetscConj(*v) * (*v)); 706 v++; 707 } 708 v = bmat->a; 709 nz = bmat->nz * bs2; 710 for (i = 0; i < nz; i++) { 711 sum += PetscRealPart(PetscConj(*v) * (*v)); 712 v++; 713 } 714 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 715 *nrm = PetscSqrtReal(*nrm); 716 } else if (type == NORM_1) { /* max column sum */ 717 PetscReal *tmp; 718 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 719 720 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 721 v = amat->a; 722 jj = amat->j; 723 for (i = 0; i < amat->nz; i++) { 724 for (j = 0; j < bs; j++) { 725 col = bs * (cstart + *jj) + j; /* column index */ 726 for (row = 0; row < bs; row++) { 727 tmp[col] += PetscAbsScalar(*v); 728 v++; 729 } 730 } 731 jj++; 732 } 733 v = bmat->a; 734 jj = bmat->j; 735 for (i = 0; i < bmat->nz; i++) { 736 for (j = 0; j < bs; j++) { 737 col = bs * garray[*jj] + j; 738 for (row = 0; row < bs; row++) { 739 tmp[col] += PetscAbsScalar(*v); 740 v++; 741 } 742 } 743 jj++; 744 } 745 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 746 *nrm = 0.0; 747 for (j = 0; j < mat->cmap->N; j++) { 748 if (tmp[j] > *nrm) *nrm = tmp[j]; 749 } 750 PetscCall(PetscFree(tmp)); 751 } else if (type == NORM_INFINITY) { /* max row sum */ 752 PetscReal *sums; 753 PetscCall(PetscMalloc1(bs, &sums)); 754 sum = 0.0; 755 for (j = 0; j < amat->mbs; j++) { 756 for (row = 0; row < bs; row++) sums[row] = 0.0; 757 v = amat->a + bs2 * amat->i[j]; 758 nz = amat->i[j + 1] - amat->i[j]; 759 for (i = 0; i < nz; i++) { 760 for (col = 0; col < bs; col++) { 761 for (row = 0; row < bs; row++) { 762 sums[row] += PetscAbsScalar(*v); 763 v++; 764 } 765 } 766 } 767 v = bmat->a + bs2 * bmat->i[j]; 768 nz = bmat->i[j + 1] - bmat->i[j]; 769 for (i = 0; i < nz; i++) { 770 for (col = 0; col < bs; col++) { 771 for (row = 0; row < bs; row++) { 772 sums[row] += PetscAbsScalar(*v); 773 v++; 774 } 775 } 776 } 777 for (row = 0; row < bs; row++) { 778 if (sums[row] > sum) sum = sums[row]; 779 } 780 } 781 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 782 PetscCall(PetscFree(sums)); 783 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 784 } 785 PetscFunctionReturn(PETSC_SUCCESS); 786 } 787 788 /* 789 Creates the hash table, and sets the table 790 This table is created only once. 791 If new entries need to be added to the matrix 792 then the hash table has to be destroyed and 793 recreated. 794 */ 795 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 796 { 797 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 798 Mat A = baij->A, B = baij->B; 799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 800 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 801 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 802 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 803 PetscInt *HT, key; 804 MatScalar **HD; 805 PetscReal tmp; 806 #if defined(PETSC_USE_INFO) 807 PetscInt ct = 0, max = 0; 808 #endif 809 810 PetscFunctionBegin; 811 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 812 813 baij->ht_size = (PetscInt)(factor * nz); 814 ht_size = baij->ht_size; 815 816 /* Allocate Memory for Hash Table */ 817 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 818 HD = baij->hd; 819 HT = baij->ht; 820 821 /* Loop Over A */ 822 for (i = 0; i < a->mbs; i++) { 823 for (j = ai[i]; j < ai[i + 1]; j++) { 824 row = i + rstart; 825 col = aj[j] + cstart; 826 827 key = row * Nbs + col + 1; 828 h1 = HASH(ht_size, key, tmp); 829 for (k = 0; k < ht_size; k++) { 830 if (!HT[(h1 + k) % ht_size]) { 831 HT[(h1 + k) % ht_size] = key; 832 HD[(h1 + k) % ht_size] = a->a + j * bs2; 833 break; 834 #if defined(PETSC_USE_INFO) 835 } else { 836 ct++; 837 #endif 838 } 839 } 840 #if defined(PETSC_USE_INFO) 841 if (k > max) max = k; 842 #endif 843 } 844 } 845 /* Loop Over B */ 846 for (i = 0; i < b->mbs; i++) { 847 for (j = bi[i]; j < bi[i + 1]; j++) { 848 row = i + rstart; 849 col = garray[bj[j]]; 850 key = row * Nbs + col + 1; 851 h1 = HASH(ht_size, key, tmp); 852 for (k = 0; k < ht_size; k++) { 853 if (!HT[(h1 + k) % ht_size]) { 854 HT[(h1 + k) % ht_size] = key; 855 HD[(h1 + k) % ht_size] = b->a + j * bs2; 856 break; 857 #if defined(PETSC_USE_INFO) 858 } else { 859 ct++; 860 #endif 861 } 862 } 863 #if defined(PETSC_USE_INFO) 864 if (k > max) max = k; 865 #endif 866 } 867 } 868 869 /* Print Summary */ 870 #if defined(PETSC_USE_INFO) 871 for (i = 0, j = 0; i < ht_size; i++) { 872 if (HT[i]) j++; 873 } 874 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 875 #endif 876 PetscFunctionReturn(PETSC_SUCCESS); 877 } 878 879 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 880 { 881 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 882 PetscInt nstash, reallocs; 883 884 PetscFunctionBegin; 885 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 886 887 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 888 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 889 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 890 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 891 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 892 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 897 { 898 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 900 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 901 PetscInt *row, *col; 902 PetscBool r1, r2, r3, all_assembled; 903 MatScalar *val; 904 PetscMPIInt n; 905 906 PetscFunctionBegin; 907 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 908 if (!baij->donotstash && !mat->nooffprocentries) { 909 while (1) { 910 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 911 if (!flg) break; 912 913 for (i = 0; i < n;) { 914 /* Now identify the consecutive vals belonging to the same row */ 915 for (j = i, rstart = row[j]; j < n; j++) { 916 if (row[j] != rstart) break; 917 } 918 if (j < n) ncols = j - i; 919 else ncols = n - i; 920 /* Now assemble all these values with a single function call */ 921 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 922 i = j; 923 } 924 } 925 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 926 /* Now process the block-stash. Since the values are stashed column-oriented, 927 set the row-oriented flag to column-oriented, and after MatSetValues() 928 restore the original flags */ 929 r1 = baij->roworiented; 930 r2 = a->roworiented; 931 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 932 933 baij->roworiented = PETSC_FALSE; 934 a->roworiented = PETSC_FALSE; 935 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 936 while (1) { 937 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 938 if (!flg) break; 939 940 for (i = 0; i < n;) { 941 /* Now identify the consecutive vals belonging to the same row */ 942 for (j = i, rstart = row[j]; j < n; j++) { 943 if (row[j] != rstart) break; 944 } 945 if (j < n) ncols = j - i; 946 else ncols = n - i; 947 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 948 i = j; 949 } 950 } 951 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 952 953 baij->roworiented = r1; 954 a->roworiented = r2; 955 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 956 } 957 958 PetscCall(MatAssemblyBegin(baij->A, mode)); 959 PetscCall(MatAssemblyEnd(baij->A, mode)); 960 961 /* determine if any process has disassembled, if so we must 962 also disassemble ourselves, in order that we may reassemble. */ 963 /* 964 if nonzero structure of submatrix B cannot change then we know that 965 no process disassembled thus we can skip this stuff 966 */ 967 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 968 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 969 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 970 } 971 972 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 973 PetscCall(MatAssemblyBegin(baij->B, mode)); 974 PetscCall(MatAssemblyEnd(baij->B, mode)); 975 976 #if defined(PETSC_USE_INFO) 977 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 978 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 979 980 baij->ht_total_ct = 0; 981 baij->ht_insert_ct = 0; 982 } 983 #endif 984 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 985 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 986 987 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 988 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 989 } 990 991 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 992 993 baij->rowvalues = NULL; 994 995 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 996 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 997 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 998 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 999 } 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 #include <petscdraw.h> 1004 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1005 { 1006 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1007 PetscMPIInt rank = baij->rank; 1008 PetscInt bs = mat->rmap->bs; 1009 PetscBool isascii, isdraw; 1010 PetscViewer sviewer; 1011 PetscViewerFormat format; 1012 1013 PetscFunctionBegin; 1014 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1015 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1016 if (isascii) { 1017 PetscCall(PetscViewerGetFormat(viewer, &format)); 1018 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1019 MatInfo info; 1020 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1021 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1022 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1023 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1024 mat->rmap->bs, info.memory)); 1025 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1026 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1027 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1028 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1029 PetscCall(PetscViewerFlush(viewer)); 1030 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1031 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1032 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1035 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } 1040 } 1041 1042 if (isdraw) { 1043 PetscDraw draw; 1044 PetscBool isnull; 1045 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1046 PetscCall(PetscDrawIsNull(draw, &isnull)); 1047 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1048 } 1049 1050 { 1051 /* assemble the entire matrix onto first processor. */ 1052 Mat A; 1053 Mat_SeqBAIJ *Aloc; 1054 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1055 MatScalar *a; 1056 const char *matname; 1057 1058 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1059 /* Perhaps this should be the type of mat? */ 1060 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1061 if (rank == 0) { 1062 PetscCall(MatSetSizes(A, M, N, M, N)); 1063 } else { 1064 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1065 } 1066 PetscCall(MatSetType(A, MATMPIBAIJ)); 1067 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1068 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1069 1070 /* copy over the A part */ 1071 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1072 ai = Aloc->i; 1073 aj = Aloc->j; 1074 a = Aloc->a; 1075 PetscCall(PetscMalloc1(bs, &rvals)); 1076 1077 for (i = 0; i < mbs; i++) { 1078 rvals[0] = bs * (baij->rstartbs + i); 1079 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1080 for (j = ai[i]; j < ai[i + 1]; j++) { 1081 col = (baij->cstartbs + aj[j]) * bs; 1082 for (k = 0; k < bs; k++) { 1083 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1084 col++; 1085 a += bs; 1086 } 1087 } 1088 } 1089 /* copy over the B part */ 1090 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1091 ai = Aloc->i; 1092 aj = Aloc->j; 1093 a = Aloc->a; 1094 for (i = 0; i < mbs; i++) { 1095 rvals[0] = bs * (baij->rstartbs + i); 1096 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1097 for (j = ai[i]; j < ai[i + 1]; j++) { 1098 col = baij->garray[aj[j]] * bs; 1099 for (k = 0; k < bs; k++) { 1100 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1101 col++; 1102 a += bs; 1103 } 1104 } 1105 } 1106 PetscCall(PetscFree(rvals)); 1107 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1108 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1109 /* 1110 Everyone has to call to draw the matrix since the graphics waits are 1111 synchronized across all processors that share the PetscDraw object 1112 */ 1113 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1114 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1115 if (rank == 0) { 1116 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1117 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1118 } 1119 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1120 PetscCall(MatDestroy(&A)); 1121 } 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1126 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1127 { 1128 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1129 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1130 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1131 const PetscInt *garray = aij->garray; 1132 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1133 PetscCount nz, hnz; 1134 PetscInt *rowlens, *colidxs; 1135 PetscScalar *matvals; 1136 PetscMPIInt rank; 1137 1138 PetscFunctionBegin; 1139 PetscCall(PetscViewerSetUp(viewer)); 1140 1141 M = mat->rmap->N; 1142 N = mat->cmap->N; 1143 m = mat->rmap->n; 1144 rs = mat->rmap->rstart; 1145 cs = mat->cmap->rstart; 1146 bs = mat->rmap->bs; 1147 nz = bs * bs * (A->nz + B->nz); 1148 1149 /* write matrix header */ 1150 header[0] = MAT_FILE_CLASSID; 1151 header[1] = M; 1152 header[2] = N; 1153 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1154 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1155 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1156 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1157 1158 /* fill in and store row lengths */ 1159 PetscCall(PetscMalloc1(m, &rowlens)); 1160 for (cnt = 0, i = 0; i < A->mbs; i++) 1161 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1162 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1163 PetscCall(PetscFree(rowlens)); 1164 1165 /* fill in and store column indices */ 1166 PetscCall(PetscMalloc1(nz, &colidxs)); 1167 for (cnt = 0, i = 0; i < A->mbs; i++) { 1168 for (k = 0; k < bs; k++) { 1169 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1170 if (garray[B->j[jb]] > cs / bs) break; 1171 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1172 } 1173 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1174 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1175 for (; jb < B->i[i + 1]; jb++) 1176 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1177 } 1178 } 1179 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1180 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1181 PetscCall(PetscFree(colidxs)); 1182 1183 /* fill in and store nonzero values */ 1184 PetscCall(PetscMalloc1(nz, &matvals)); 1185 for (cnt = 0, i = 0; i < A->mbs; i++) { 1186 for (k = 0; k < bs; k++) { 1187 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1188 if (garray[B->j[jb]] > cs / bs) break; 1189 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1190 } 1191 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1192 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1193 for (; jb < B->i[i + 1]; jb++) 1194 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1195 } 1196 } 1197 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1198 PetscCall(PetscFree(matvals)); 1199 1200 /* write block size option to the viewer's .info file */ 1201 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1202 PetscFunctionReturn(PETSC_SUCCESS); 1203 } 1204 1205 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1206 { 1207 PetscBool isascii, isdraw, issocket, isbinary; 1208 1209 PetscFunctionBegin; 1210 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1211 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1213 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1214 if (isascii || isdraw || issocket) PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1215 else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1216 PetscFunctionReturn(PETSC_SUCCESS); 1217 } 1218 1219 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1220 { 1221 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1222 PetscInt nt; 1223 1224 PetscFunctionBegin; 1225 PetscCall(VecGetLocalSize(xx, &nt)); 1226 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1227 PetscCall(VecGetLocalSize(yy, &nt)); 1228 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1229 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1230 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1231 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1232 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1233 PetscFunctionReturn(PETSC_SUCCESS); 1234 } 1235 1236 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1237 { 1238 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1239 1240 PetscFunctionBegin; 1241 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1242 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1243 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1244 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1245 PetscFunctionReturn(PETSC_SUCCESS); 1246 } 1247 1248 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1249 { 1250 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1251 1252 PetscFunctionBegin; 1253 /* do nondiagonal part */ 1254 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1255 /* do local part */ 1256 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1257 /* add partial results together */ 1258 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1259 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1260 PetscFunctionReturn(PETSC_SUCCESS); 1261 } 1262 1263 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1264 { 1265 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1266 1267 PetscFunctionBegin; 1268 /* do nondiagonal part */ 1269 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1270 /* do local part */ 1271 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1272 /* add partial results together */ 1273 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1274 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1275 PetscFunctionReturn(PETSC_SUCCESS); 1276 } 1277 1278 /* 1279 This only works correctly for square matrices where the subblock A->A is the 1280 diagonal block 1281 */ 1282 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1283 { 1284 PetscFunctionBegin; 1285 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1286 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 1290 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1291 { 1292 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1293 1294 PetscFunctionBegin; 1295 PetscCall(MatScale(a->A, aa)); 1296 PetscCall(MatScale(a->B, aa)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } 1299 1300 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1301 { 1302 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1303 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1304 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1305 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1306 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1307 1308 PetscFunctionBegin; 1309 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1310 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1311 mat->getrowactive = PETSC_TRUE; 1312 1313 if (!mat->rowvalues && (idx || v)) { 1314 /* 1315 allocate enough space to hold information from the longest row. 1316 */ 1317 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1318 PetscInt max = 1, mbs = mat->mbs, tmp; 1319 for (i = 0; i < mbs; i++) { 1320 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1321 if (max < tmp) max = tmp; 1322 } 1323 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1324 } 1325 lrow = row - brstart; 1326 1327 pvA = &vworkA; 1328 pcA = &cworkA; 1329 pvB = &vworkB; 1330 pcB = &cworkB; 1331 if (!v) { 1332 pvA = NULL; 1333 pvB = NULL; 1334 } 1335 if (!idx) { 1336 pcA = NULL; 1337 if (!v) pcB = NULL; 1338 } 1339 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1340 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1341 nztot = nzA + nzB; 1342 1343 cmap = mat->garray; 1344 if (v || idx) { 1345 if (nztot) { 1346 /* Sort by increasing column numbers, assuming A and B already sorted */ 1347 PetscInt imark = -1; 1348 if (v) { 1349 *v = v_p = mat->rowvalues; 1350 for (i = 0; i < nzB; i++) { 1351 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1352 else break; 1353 } 1354 imark = i; 1355 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1356 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1357 } 1358 if (idx) { 1359 *idx = idx_p = mat->rowindices; 1360 if (imark > -1) { 1361 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1362 } else { 1363 for (i = 0; i < nzB; i++) { 1364 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1365 else break; 1366 } 1367 imark = i; 1368 } 1369 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1370 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1371 } 1372 } else { 1373 if (idx) *idx = NULL; 1374 if (v) *v = NULL; 1375 } 1376 } 1377 *nz = nztot; 1378 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1379 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1384 { 1385 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1386 1387 PetscFunctionBegin; 1388 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1389 baij->getrowactive = PETSC_FALSE; 1390 PetscFunctionReturn(PETSC_SUCCESS); 1391 } 1392 1393 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1394 { 1395 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1396 1397 PetscFunctionBegin; 1398 PetscCall(MatZeroEntries(l->A)); 1399 PetscCall(MatZeroEntries(l->B)); 1400 PetscFunctionReturn(PETSC_SUCCESS); 1401 } 1402 1403 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1404 { 1405 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1406 Mat A = a->A, B = a->B; 1407 PetscLogDouble isend[5], irecv[5]; 1408 1409 PetscFunctionBegin; 1410 info->block_size = (PetscReal)matin->rmap->bs; 1411 1412 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1413 1414 isend[0] = info->nz_used; 1415 isend[1] = info->nz_allocated; 1416 isend[2] = info->nz_unneeded; 1417 isend[3] = info->memory; 1418 isend[4] = info->mallocs; 1419 1420 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1421 1422 isend[0] += info->nz_used; 1423 isend[1] += info->nz_allocated; 1424 isend[2] += info->nz_unneeded; 1425 isend[3] += info->memory; 1426 isend[4] += info->mallocs; 1427 1428 if (flag == MAT_LOCAL) { 1429 info->nz_used = isend[0]; 1430 info->nz_allocated = isend[1]; 1431 info->nz_unneeded = isend[2]; 1432 info->memory = isend[3]; 1433 info->mallocs = isend[4]; 1434 } else if (flag == MAT_GLOBAL_MAX) { 1435 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1436 1437 info->nz_used = irecv[0]; 1438 info->nz_allocated = irecv[1]; 1439 info->nz_unneeded = irecv[2]; 1440 info->memory = irecv[3]; 1441 info->mallocs = irecv[4]; 1442 } else if (flag == MAT_GLOBAL_SUM) { 1443 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1444 1445 info->nz_used = irecv[0]; 1446 info->nz_allocated = irecv[1]; 1447 info->nz_unneeded = irecv[2]; 1448 info->memory = irecv[3]; 1449 info->mallocs = irecv[4]; 1450 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1451 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1452 info->fill_ratio_needed = 0; 1453 info->factor_mallocs = 0; 1454 PetscFunctionReturn(PETSC_SUCCESS); 1455 } 1456 1457 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1458 { 1459 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1460 1461 PetscFunctionBegin; 1462 switch (op) { 1463 case MAT_NEW_NONZERO_LOCATIONS: 1464 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1465 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1466 case MAT_KEEP_NONZERO_PATTERN: 1467 case MAT_NEW_NONZERO_LOCATION_ERR: 1468 MatCheckPreallocated(A, 1); 1469 PetscCall(MatSetOption(a->A, op, flg)); 1470 PetscCall(MatSetOption(a->B, op, flg)); 1471 break; 1472 case MAT_ROW_ORIENTED: 1473 MatCheckPreallocated(A, 1); 1474 a->roworiented = flg; 1475 1476 PetscCall(MatSetOption(a->A, op, flg)); 1477 PetscCall(MatSetOption(a->B, op, flg)); 1478 break; 1479 case MAT_IGNORE_OFF_PROC_ENTRIES: 1480 a->donotstash = flg; 1481 break; 1482 case MAT_USE_HASH_TABLE: 1483 a->ht_flag = flg; 1484 a->ht_fact = 1.39; 1485 break; 1486 case MAT_SYMMETRIC: 1487 case MAT_STRUCTURALLY_SYMMETRIC: 1488 case MAT_HERMITIAN: 1489 case MAT_SYMMETRY_ETERNAL: 1490 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1491 case MAT_SPD_ETERNAL: 1492 /* if the diagonal matrix is square it inherits some of the properties above */ 1493 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1494 break; 1495 default: 1496 break; 1497 } 1498 PetscFunctionReturn(PETSC_SUCCESS); 1499 } 1500 1501 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1502 { 1503 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1504 Mat_SeqBAIJ *Aloc; 1505 Mat B; 1506 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1507 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1508 MatScalar *a; 1509 1510 PetscFunctionBegin; 1511 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1512 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1513 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1514 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1515 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1516 /* Do not know preallocation information, but must set block size */ 1517 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1518 } else { 1519 B = *matout; 1520 } 1521 1522 /* copy over the A part */ 1523 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1524 ai = Aloc->i; 1525 aj = Aloc->j; 1526 a = Aloc->a; 1527 PetscCall(PetscMalloc1(bs, &rvals)); 1528 1529 for (i = 0; i < mbs; i++) { 1530 rvals[0] = bs * (baij->rstartbs + i); 1531 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1532 for (j = ai[i]; j < ai[i + 1]; j++) { 1533 col = (baij->cstartbs + aj[j]) * bs; 1534 for (k = 0; k < bs; k++) { 1535 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1536 1537 col++; 1538 a += bs; 1539 } 1540 } 1541 } 1542 /* copy over the B part */ 1543 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1544 ai = Aloc->i; 1545 aj = Aloc->j; 1546 a = Aloc->a; 1547 for (i = 0; i < mbs; i++) { 1548 rvals[0] = bs * (baij->rstartbs + i); 1549 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1550 for (j = ai[i]; j < ai[i + 1]; j++) { 1551 col = baij->garray[aj[j]] * bs; 1552 for (k = 0; k < bs; k++) { 1553 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1554 col++; 1555 a += bs; 1556 } 1557 } 1558 } 1559 PetscCall(PetscFree(rvals)); 1560 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1561 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1562 1563 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1564 else PetscCall(MatHeaderMerge(A, &B)); 1565 PetscFunctionReturn(PETSC_SUCCESS); 1566 } 1567 1568 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1569 { 1570 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1571 Mat a = baij->A, b = baij->B; 1572 PetscInt s1, s2, s3; 1573 1574 PetscFunctionBegin; 1575 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1576 if (rr) { 1577 PetscCall(VecGetLocalSize(rr, &s1)); 1578 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1579 /* Overlap communication with computation. */ 1580 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1581 } 1582 if (ll) { 1583 PetscCall(VecGetLocalSize(ll, &s1)); 1584 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1585 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1586 } 1587 /* scale the diagonal block */ 1588 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1589 1590 if (rr) { 1591 /* Do a scatter end and then right scale the off-diagonal block */ 1592 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1593 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1594 } 1595 PetscFunctionReturn(PETSC_SUCCESS); 1596 } 1597 1598 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1599 { 1600 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1601 PetscInt *lrows; 1602 PetscInt r, len; 1603 PetscBool cong; 1604 1605 PetscFunctionBegin; 1606 /* get locally owned rows */ 1607 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1608 /* fix right-hand side if needed */ 1609 if (x && b) { 1610 const PetscScalar *xx; 1611 PetscScalar *bb; 1612 1613 PetscCall(VecGetArrayRead(x, &xx)); 1614 PetscCall(VecGetArray(b, &bb)); 1615 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1616 PetscCall(VecRestoreArrayRead(x, &xx)); 1617 PetscCall(VecRestoreArray(b, &bb)); 1618 } 1619 1620 /* actually zap the local rows */ 1621 /* 1622 Zero the required rows. If the "diagonal block" of the matrix 1623 is square and the user wishes to set the diagonal we use separate 1624 code so that MatSetValues() is not called for each diagonal allocating 1625 new memory, thus calling lots of mallocs and slowing things down. 1626 1627 */ 1628 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1629 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1630 PetscCall(MatHasCongruentLayouts(A, &cong)); 1631 if ((diag != 0.0) && cong) { 1632 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1633 } else if (diag != 0.0) { 1634 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1635 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1636 for (r = 0; r < len; ++r) { 1637 const PetscInt row = lrows[r] + A->rmap->rstart; 1638 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1639 } 1640 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1641 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1642 } else { 1643 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1644 } 1645 PetscCall(PetscFree(lrows)); 1646 1647 /* only change matrix nonzero state if pattern was allowed to be changed */ 1648 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1649 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1650 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1651 } 1652 PetscFunctionReturn(PETSC_SUCCESS); 1653 } 1654 1655 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1656 { 1657 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1658 PetscMPIInt n, p = 0; 1659 PetscInt i, j, k, r, len = 0, row, col, count; 1660 PetscInt *lrows, *owners = A->rmap->range; 1661 PetscSFNode *rrows; 1662 PetscSF sf; 1663 const PetscScalar *xx; 1664 PetscScalar *bb, *mask; 1665 Vec xmask, lmask; 1666 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1667 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1668 PetscScalar *aa; 1669 1670 PetscFunctionBegin; 1671 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1672 /* Create SF where leaves are input rows and roots are owned rows */ 1673 PetscCall(PetscMalloc1(n, &lrows)); 1674 for (r = 0; r < n; ++r) lrows[r] = -1; 1675 PetscCall(PetscMalloc1(N, &rrows)); 1676 for (r = 0; r < N; ++r) { 1677 const PetscInt idx = rows[r]; 1678 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1679 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1680 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1681 } 1682 rrows[r].rank = p; 1683 rrows[r].index = rows[r] - owners[p]; 1684 } 1685 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1686 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1687 /* Collect flags for rows to be zeroed */ 1688 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1689 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1690 PetscCall(PetscSFDestroy(&sf)); 1691 /* Compress and put in row numbers */ 1692 for (r = 0; r < n; ++r) 1693 if (lrows[r] >= 0) lrows[len++] = r; 1694 /* zero diagonal part of matrix */ 1695 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1696 /* handle off-diagonal part of matrix */ 1697 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1698 PetscCall(VecDuplicate(l->lvec, &lmask)); 1699 PetscCall(VecGetArray(xmask, &bb)); 1700 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1701 PetscCall(VecRestoreArray(xmask, &bb)); 1702 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1703 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1704 PetscCall(VecDestroy(&xmask)); 1705 if (x) { 1706 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1707 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1708 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1709 PetscCall(VecGetArray(b, &bb)); 1710 } 1711 PetscCall(VecGetArray(lmask, &mask)); 1712 /* remove zeroed rows of off-diagonal matrix */ 1713 for (i = 0; i < len; ++i) { 1714 row = lrows[i]; 1715 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1716 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1717 for (k = 0; k < count; ++k) { 1718 aa[0] = 0.0; 1719 aa += bs; 1720 } 1721 } 1722 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1723 for (i = 0; i < l->B->rmap->N; ++i) { 1724 row = i / bs; 1725 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1726 for (k = 0; k < bs; ++k) { 1727 col = bs * baij->j[j] + k; 1728 if (PetscAbsScalar(mask[col])) { 1729 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1730 if (x) bb[i] -= aa[0] * xx[col]; 1731 aa[0] = 0.0; 1732 } 1733 } 1734 } 1735 } 1736 if (x) { 1737 PetscCall(VecRestoreArray(b, &bb)); 1738 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1739 } 1740 PetscCall(VecRestoreArray(lmask, &mask)); 1741 PetscCall(VecDestroy(&lmask)); 1742 PetscCall(PetscFree(lrows)); 1743 1744 /* only change matrix nonzero state if pattern was allowed to be changed */ 1745 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1746 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1747 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1748 } 1749 PetscFunctionReturn(PETSC_SUCCESS); 1750 } 1751 1752 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1753 { 1754 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1755 1756 PetscFunctionBegin; 1757 PetscCall(MatSetUnfactored(a->A)); 1758 PetscFunctionReturn(PETSC_SUCCESS); 1759 } 1760 1761 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1762 1763 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1764 { 1765 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1766 Mat a, b, c, d; 1767 PetscBool flg; 1768 1769 PetscFunctionBegin; 1770 a = matA->A; 1771 b = matA->B; 1772 c = matB->A; 1773 d = matB->B; 1774 1775 PetscCall(MatEqual(a, c, &flg)); 1776 if (flg) PetscCall(MatEqual(b, d, &flg)); 1777 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1778 PetscFunctionReturn(PETSC_SUCCESS); 1779 } 1780 1781 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1782 { 1783 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1784 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1785 1786 PetscFunctionBegin; 1787 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1788 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1789 PetscCall(MatCopy_Basic(A, B, str)); 1790 } else { 1791 PetscCall(MatCopy(a->A, b->A, str)); 1792 PetscCall(MatCopy(a->B, b->B, str)); 1793 } 1794 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1795 PetscFunctionReturn(PETSC_SUCCESS); 1796 } 1797 1798 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1799 { 1800 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1801 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1802 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1803 1804 PetscFunctionBegin; 1805 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1810 { 1811 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1812 PetscBLASInt bnz, one = 1; 1813 Mat_SeqBAIJ *x, *y; 1814 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1815 1816 PetscFunctionBegin; 1817 if (str == SAME_NONZERO_PATTERN) { 1818 PetscScalar alpha = a; 1819 x = (Mat_SeqBAIJ *)xx->A->data; 1820 y = (Mat_SeqBAIJ *)yy->A->data; 1821 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1822 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1823 x = (Mat_SeqBAIJ *)xx->B->data; 1824 y = (Mat_SeqBAIJ *)yy->B->data; 1825 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1826 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1827 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1828 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1829 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1830 } else { 1831 Mat B; 1832 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1833 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1834 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1835 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1836 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1837 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1838 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1839 PetscCall(MatSetType(B, MATMPIBAIJ)); 1840 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1841 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1842 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1843 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1844 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1845 PetscCall(MatHeaderMerge(Y, &B)); 1846 PetscCall(PetscFree(nnz_d)); 1847 PetscCall(PetscFree(nnz_o)); 1848 } 1849 PetscFunctionReturn(PETSC_SUCCESS); 1850 } 1851 1852 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1853 { 1854 PetscFunctionBegin; 1855 if (PetscDefined(USE_COMPLEX)) { 1856 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1857 1858 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1859 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1860 } 1861 PetscFunctionReturn(PETSC_SUCCESS); 1862 } 1863 1864 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1865 { 1866 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1867 1868 PetscFunctionBegin; 1869 PetscCall(MatRealPart(a->A)); 1870 PetscCall(MatRealPart(a->B)); 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1875 { 1876 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1877 1878 PetscFunctionBegin; 1879 PetscCall(MatImaginaryPart(a->A)); 1880 PetscCall(MatImaginaryPart(a->B)); 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } 1883 1884 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1885 { 1886 IS iscol_local; 1887 PetscInt csize; 1888 1889 PetscFunctionBegin; 1890 PetscCall(ISGetLocalSize(iscol, &csize)); 1891 if (call == MAT_REUSE_MATRIX) { 1892 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1893 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1894 } else { 1895 PetscCall(ISAllGather(iscol, &iscol_local)); 1896 } 1897 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1898 if (call == MAT_INITIAL_MATRIX) { 1899 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1900 PetscCall(ISDestroy(&iscol_local)); 1901 } 1902 PetscFunctionReturn(PETSC_SUCCESS); 1903 } 1904 1905 /* 1906 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1907 in local and then by concatenating the local matrices the end result. 1908 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1909 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1910 */ 1911 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1912 { 1913 PetscMPIInt rank, size; 1914 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1915 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1916 Mat M, Mreuse; 1917 MatScalar *vwork, *aa; 1918 MPI_Comm comm; 1919 IS isrow_new, iscol_new; 1920 Mat_SeqBAIJ *aij; 1921 1922 PetscFunctionBegin; 1923 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1924 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1925 PetscCallMPI(MPI_Comm_size(comm, &size)); 1926 /* The compression and expansion should be avoided. Doesn't point 1927 out errors, might change the indices, hence buggey */ 1928 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1929 if (isrow == iscol) { 1930 iscol_new = isrow_new; 1931 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1932 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1933 1934 if (call == MAT_REUSE_MATRIX) { 1935 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1936 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1937 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1938 } else { 1939 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1940 } 1941 PetscCall(ISDestroy(&isrow_new)); 1942 PetscCall(ISDestroy(&iscol_new)); 1943 /* 1944 m - number of local rows 1945 n - number of columns (same on all processors) 1946 rstart - first row in new global matrix generated 1947 */ 1948 PetscCall(MatGetBlockSize(mat, &bs)); 1949 PetscCall(MatGetSize(Mreuse, &m, &n)); 1950 m = m / bs; 1951 n = n / bs; 1952 1953 if (call == MAT_INITIAL_MATRIX) { 1954 aij = (Mat_SeqBAIJ *)Mreuse->data; 1955 ii = aij->i; 1956 jj = aij->j; 1957 1958 /* 1959 Determine the number of non-zeros in the diagonal and off-diagonal 1960 portions of the matrix in order to do correct preallocation 1961 */ 1962 1963 /* first get start and end of "diagonal" columns */ 1964 if (csize == PETSC_DECIDE) { 1965 PetscCall(ISGetSize(isrow, &mglobal)); 1966 if (mglobal == n * bs) { /* square matrix */ 1967 nlocal = m; 1968 } else { 1969 nlocal = n / size + ((n % size) > rank); 1970 } 1971 } else { 1972 nlocal = csize / bs; 1973 } 1974 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1975 rstart = rend - nlocal; 1976 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1977 1978 /* next, compute all the lengths */ 1979 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1980 for (i = 0; i < m; i++) { 1981 jend = ii[i + 1] - ii[i]; 1982 olen = 0; 1983 dlen = 0; 1984 for (j = 0; j < jend; j++) { 1985 if (*jj < rstart || *jj >= rend) olen++; 1986 else dlen++; 1987 jj++; 1988 } 1989 olens[i] = olen; 1990 dlens[i] = dlen; 1991 } 1992 PetscCall(MatCreate(comm, &M)); 1993 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1994 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 1995 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1996 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1997 PetscCall(PetscFree2(dlens, olens)); 1998 } else { 1999 PetscInt ml, nl; 2000 2001 M = *newmat; 2002 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2003 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2004 PetscCall(MatZeroEntries(M)); 2005 /* 2006 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2007 rather than the slower MatSetValues(). 2008 */ 2009 M->was_assembled = PETSC_TRUE; 2010 M->assembled = PETSC_FALSE; 2011 } 2012 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2013 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2014 aij = (Mat_SeqBAIJ *)Mreuse->data; 2015 ii = aij->i; 2016 jj = aij->j; 2017 aa = aij->a; 2018 for (i = 0; i < m; i++) { 2019 row = rstart / bs + i; 2020 nz = ii[i + 1] - ii[i]; 2021 cwork = jj; 2022 jj = PetscSafePointerPlusOffset(jj, nz); 2023 vwork = aa; 2024 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2025 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2026 } 2027 2028 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2029 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2030 *newmat = M; 2031 2032 /* save submatrix used in processor for next request */ 2033 if (call == MAT_INITIAL_MATRIX) { 2034 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2035 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2036 } 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 2040 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2041 { 2042 MPI_Comm comm, pcomm; 2043 PetscInt clocal_size, nrows; 2044 const PetscInt *rows; 2045 PetscMPIInt size; 2046 IS crowp, lcolp; 2047 2048 PetscFunctionBegin; 2049 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2050 /* make a collective version of 'rowp' */ 2051 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2052 if (pcomm == comm) { 2053 crowp = rowp; 2054 } else { 2055 PetscCall(ISGetSize(rowp, &nrows)); 2056 PetscCall(ISGetIndices(rowp, &rows)); 2057 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2058 PetscCall(ISRestoreIndices(rowp, &rows)); 2059 } 2060 PetscCall(ISSetPermutation(crowp)); 2061 /* make a local version of 'colp' */ 2062 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2063 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2064 if (size == 1) { 2065 lcolp = colp; 2066 } else { 2067 PetscCall(ISAllGather(colp, &lcolp)); 2068 } 2069 PetscCall(ISSetPermutation(lcolp)); 2070 /* now we just get the submatrix */ 2071 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2072 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2073 /* clean up */ 2074 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2075 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2076 PetscFunctionReturn(PETSC_SUCCESS); 2077 } 2078 2079 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2080 { 2081 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2082 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2083 2084 PetscFunctionBegin; 2085 if (nghosts) *nghosts = B->nbs; 2086 if (ghosts) *ghosts = baij->garray; 2087 PetscFunctionReturn(PETSC_SUCCESS); 2088 } 2089 2090 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2091 { 2092 Mat B; 2093 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2094 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2095 Mat_SeqAIJ *b; 2096 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2097 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2098 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2099 2100 PetscFunctionBegin; 2101 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2102 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2103 2104 /* Tell every processor the number of nonzeros per row */ 2105 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2106 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2107 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2108 displs = recvcounts + size; 2109 for (i = 0; i < size; i++) { 2110 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2111 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2112 } 2113 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2114 /* Create the sequential matrix of the same type as the local block diagonal */ 2115 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2116 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2117 PetscCall(MatSetType(B, MATSEQAIJ)); 2118 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2119 b = (Mat_SeqAIJ *)B->data; 2120 2121 /* Copy my part of matrix column indices over */ 2122 sendcount = ad->nz + bd->nz; 2123 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2124 a_jsendbuf = ad->j; 2125 b_jsendbuf = bd->j; 2126 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2127 cnt = 0; 2128 for (i = 0; i < n; i++) { 2129 /* put in lower diagonal portion */ 2130 m = bd->i[i + 1] - bd->i[i]; 2131 while (m > 0) { 2132 /* is it above diagonal (in bd (compressed) numbering) */ 2133 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2134 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2135 m--; 2136 } 2137 2138 /* put in diagonal portion */ 2139 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2140 2141 /* put in upper diagonal portion */ 2142 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2143 } 2144 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2145 2146 /* Gather all column indices to all processors */ 2147 for (i = 0; i < size; i++) { 2148 recvcounts[i] = 0; 2149 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2150 } 2151 displs[0] = 0; 2152 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2153 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2154 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2155 /* set the b->ilen (length of each row) values */ 2156 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2157 /* set the b->i indices */ 2158 b->i[0] = 0; 2159 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2160 PetscCall(PetscFree(lens)); 2161 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2162 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2163 PetscCall(PetscFree(recvcounts)); 2164 2165 PetscCall(MatPropagateSymmetryOptions(A, B)); 2166 *newmat = B; 2167 PetscFunctionReturn(PETSC_SUCCESS); 2168 } 2169 2170 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2171 { 2172 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2173 Vec bb1 = NULL; 2174 2175 PetscFunctionBegin; 2176 if (flag == SOR_APPLY_UPPER) { 2177 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2178 PetscFunctionReturn(PETSC_SUCCESS); 2179 } 2180 2181 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2182 2183 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2184 if (flag & SOR_ZERO_INITIAL_GUESS) { 2185 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2186 its--; 2187 } 2188 2189 while (its--) { 2190 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2191 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2192 2193 /* update rhs: bb1 = bb - B*x */ 2194 PetscCall(VecScale(mat->lvec, -1.0)); 2195 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2196 2197 /* local sweep */ 2198 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2199 } 2200 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2201 if (flag & SOR_ZERO_INITIAL_GUESS) { 2202 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2203 its--; 2204 } 2205 while (its--) { 2206 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2207 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2208 2209 /* update rhs: bb1 = bb - B*x */ 2210 PetscCall(VecScale(mat->lvec, -1.0)); 2211 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2212 2213 /* local sweep */ 2214 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2215 } 2216 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2217 if (flag & SOR_ZERO_INITIAL_GUESS) { 2218 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2219 its--; 2220 } 2221 while (its--) { 2222 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2223 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2224 2225 /* update rhs: bb1 = bb - B*x */ 2226 PetscCall(VecScale(mat->lvec, -1.0)); 2227 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2228 2229 /* local sweep */ 2230 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2231 } 2232 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2233 2234 PetscCall(VecDestroy(&bb1)); 2235 PetscFunctionReturn(PETSC_SUCCESS); 2236 } 2237 2238 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2239 { 2240 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2241 PetscInt m, N, i, *garray = aij->garray; 2242 PetscInt ib, jb, bs = A->rmap->bs; 2243 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2244 MatScalar *a_val = a_aij->a; 2245 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2246 MatScalar *b_val = b_aij->a; 2247 PetscReal *work; 2248 2249 PetscFunctionBegin; 2250 PetscCall(MatGetSize(A, &m, &N)); 2251 PetscCall(PetscCalloc1(N, &work)); 2252 if (type == NORM_2) { 2253 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2254 for (jb = 0; jb < bs; jb++) { 2255 for (ib = 0; ib < bs; ib++) { 2256 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2257 a_val++; 2258 } 2259 } 2260 } 2261 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2262 for (jb = 0; jb < bs; jb++) { 2263 for (ib = 0; ib < bs; ib++) { 2264 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2265 b_val++; 2266 } 2267 } 2268 } 2269 } else if (type == NORM_1) { 2270 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2271 for (jb = 0; jb < bs; jb++) { 2272 for (ib = 0; ib < bs; ib++) { 2273 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2274 a_val++; 2275 } 2276 } 2277 } 2278 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2279 for (jb = 0; jb < bs; jb++) { 2280 for (ib = 0; ib < bs; ib++) { 2281 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2282 b_val++; 2283 } 2284 } 2285 } 2286 } else if (type == NORM_INFINITY) { 2287 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2288 for (jb = 0; jb < bs; jb++) { 2289 for (ib = 0; ib < bs; ib++) { 2290 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2291 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2292 a_val++; 2293 } 2294 } 2295 } 2296 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2297 for (jb = 0; jb < bs; jb++) { 2298 for (ib = 0; ib < bs; ib++) { 2299 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2300 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2301 b_val++; 2302 } 2303 } 2304 } 2305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2306 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2307 for (jb = 0; jb < bs; jb++) { 2308 for (ib = 0; ib < bs; ib++) { 2309 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2310 a_val++; 2311 } 2312 } 2313 } 2314 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2315 for (jb = 0; jb < bs; jb++) { 2316 for (ib = 0; ib < bs; ib++) { 2317 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2318 b_val++; 2319 } 2320 } 2321 } 2322 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2323 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2324 for (jb = 0; jb < bs; jb++) { 2325 for (ib = 0; ib < bs; ib++) { 2326 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2327 a_val++; 2328 } 2329 } 2330 } 2331 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2332 for (jb = 0; jb < bs; jb++) { 2333 for (ib = 0; ib < bs; ib++) { 2334 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2335 b_val++; 2336 } 2337 } 2338 } 2339 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2340 if (type == NORM_INFINITY) { 2341 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2342 } else { 2343 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2344 } 2345 PetscCall(PetscFree(work)); 2346 if (type == NORM_2) { 2347 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2348 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2349 for (i = 0; i < N; i++) reductions[i] /= m; 2350 } 2351 PetscFunctionReturn(PETSC_SUCCESS); 2352 } 2353 2354 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2355 { 2356 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2357 2358 PetscFunctionBegin; 2359 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2360 A->factorerrortype = a->A->factorerrortype; 2361 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2362 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2363 PetscFunctionReturn(PETSC_SUCCESS); 2364 } 2365 2366 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2367 { 2368 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2369 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2370 2371 PetscFunctionBegin; 2372 if (!Y->preallocated) { 2373 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2374 } else if (!aij->nz) { 2375 PetscInt nonew = aij->nonew; 2376 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2377 aij->nonew = nonew; 2378 } 2379 PetscCall(MatShift_Basic(Y, a)); 2380 PetscFunctionReturn(PETSC_SUCCESS); 2381 } 2382 2383 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2384 { 2385 PetscFunctionBegin; 2386 *a = ((Mat_MPIBAIJ *)A->data)->A; 2387 PetscFunctionReturn(PETSC_SUCCESS); 2388 } 2389 2390 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2391 { 2392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2393 2394 PetscFunctionBegin; 2395 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2396 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2397 PetscFunctionReturn(PETSC_SUCCESS); 2398 } 2399 2400 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2401 MatGetRow_MPIBAIJ, 2402 MatRestoreRow_MPIBAIJ, 2403 MatMult_MPIBAIJ, 2404 /* 4*/ MatMultAdd_MPIBAIJ, 2405 MatMultTranspose_MPIBAIJ, 2406 MatMultTransposeAdd_MPIBAIJ, 2407 NULL, 2408 NULL, 2409 NULL, 2410 /*10*/ NULL, 2411 NULL, 2412 NULL, 2413 MatSOR_MPIBAIJ, 2414 MatTranspose_MPIBAIJ, 2415 /*15*/ MatGetInfo_MPIBAIJ, 2416 MatEqual_MPIBAIJ, 2417 MatGetDiagonal_MPIBAIJ, 2418 MatDiagonalScale_MPIBAIJ, 2419 MatNorm_MPIBAIJ, 2420 /*20*/ MatAssemblyBegin_MPIBAIJ, 2421 MatAssemblyEnd_MPIBAIJ, 2422 MatSetOption_MPIBAIJ, 2423 MatZeroEntries_MPIBAIJ, 2424 /*24*/ MatZeroRows_MPIBAIJ, 2425 NULL, 2426 NULL, 2427 NULL, 2428 NULL, 2429 /*29*/ MatSetUp_MPI_Hash, 2430 NULL, 2431 NULL, 2432 MatGetDiagonalBlock_MPIBAIJ, 2433 NULL, 2434 /*34*/ MatDuplicate_MPIBAIJ, 2435 NULL, 2436 NULL, 2437 NULL, 2438 NULL, 2439 /*39*/ MatAXPY_MPIBAIJ, 2440 MatCreateSubMatrices_MPIBAIJ, 2441 MatIncreaseOverlap_MPIBAIJ, 2442 MatGetValues_MPIBAIJ, 2443 MatCopy_MPIBAIJ, 2444 /*44*/ NULL, 2445 MatScale_MPIBAIJ, 2446 MatShift_MPIBAIJ, 2447 NULL, 2448 MatZeroRowsColumns_MPIBAIJ, 2449 /*49*/ NULL, 2450 NULL, 2451 NULL, 2452 NULL, 2453 NULL, 2454 /*54*/ MatFDColoringCreate_MPIXAIJ, 2455 NULL, 2456 MatSetUnfactored_MPIBAIJ, 2457 MatPermute_MPIBAIJ, 2458 MatSetValuesBlocked_MPIBAIJ, 2459 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2460 MatDestroy_MPIBAIJ, 2461 MatView_MPIBAIJ, 2462 NULL, 2463 NULL, 2464 /*64*/ NULL, 2465 NULL, 2466 NULL, 2467 NULL, 2468 MatGetRowMaxAbs_MPIBAIJ, 2469 /*69*/ NULL, 2470 NULL, 2471 NULL, 2472 MatFDColoringApply_BAIJ, 2473 NULL, 2474 /*74*/ NULL, 2475 NULL, 2476 NULL, 2477 NULL, 2478 MatLoad_MPIBAIJ, 2479 /*79*/ NULL, 2480 NULL, 2481 NULL, 2482 NULL, 2483 NULL, 2484 /*84*/ NULL, 2485 NULL, 2486 NULL, 2487 NULL, 2488 NULL, 2489 /*89*/ NULL, 2490 NULL, 2491 NULL, 2492 NULL, 2493 MatConjugate_MPIBAIJ, 2494 /*94*/ NULL, 2495 NULL, 2496 MatRealPart_MPIBAIJ, 2497 MatImaginaryPart_MPIBAIJ, 2498 NULL, 2499 /*99*/ NULL, 2500 NULL, 2501 NULL, 2502 NULL, 2503 NULL, 2504 /*104*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2505 NULL, 2506 MatGetGhosts_MPIBAIJ, 2507 NULL, 2508 NULL, 2509 /*109*/ NULL, 2510 NULL, 2511 NULL, 2512 NULL, 2513 MatGetMultiProcBlock_MPIBAIJ, 2514 /*114*/ NULL, 2515 MatGetColumnReductions_MPIBAIJ, 2516 MatInvertBlockDiagonal_MPIBAIJ, 2517 NULL, 2518 NULL, 2519 /*119*/ NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 NULL, 2524 /*124*/ NULL, 2525 NULL, 2526 MatSetBlockSizes_Default, 2527 NULL, 2528 MatFDColoringSetUp_MPIXAIJ, 2529 /*129*/ NULL, 2530 MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2531 NULL, 2532 NULL, 2533 NULL, 2534 /*134*/ NULL, 2535 NULL, 2536 MatEliminateZeros_MPIBAIJ, 2537 MatGetRowSumAbs_MPIBAIJ, 2538 NULL, 2539 /*139*/ NULL, 2540 NULL, 2541 MatCopyHashToXAIJ_MPI_Hash, 2542 NULL, 2543 NULL}; 2544 2545 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2546 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2547 2548 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2549 { 2550 PetscInt m, rstart, cstart, cend; 2551 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2552 const PetscInt *JJ = NULL; 2553 PetscScalar *values = NULL; 2554 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2555 PetscBool nooffprocentries; 2556 2557 PetscFunctionBegin; 2558 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2559 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2560 PetscCall(PetscLayoutSetUp(B->rmap)); 2561 PetscCall(PetscLayoutSetUp(B->cmap)); 2562 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2563 m = B->rmap->n / bs; 2564 rstart = B->rmap->rstart / bs; 2565 cstart = B->cmap->rstart / bs; 2566 cend = B->cmap->rend / bs; 2567 2568 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2569 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2570 for (i = 0; i < m; i++) { 2571 nz = ii[i + 1] - ii[i]; 2572 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2573 nz_max = PetscMax(nz_max, nz); 2574 dlen = 0; 2575 olen = 0; 2576 JJ = jj + ii[i]; 2577 for (j = 0; j < nz; j++) { 2578 if (*JJ < cstart || *JJ >= cend) olen++; 2579 else dlen++; 2580 JJ++; 2581 } 2582 d_nnz[i] = dlen; 2583 o_nnz[i] = olen; 2584 } 2585 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2586 PetscCall(PetscFree2(d_nnz, o_nnz)); 2587 2588 values = (PetscScalar *)V; 2589 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2590 for (i = 0; i < m; i++) { 2591 PetscInt row = i + rstart; 2592 PetscInt ncols = ii[i + 1] - ii[i]; 2593 const PetscInt *icols = jj + ii[i]; 2594 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2595 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2596 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2597 } else { /* block ordering does not match so we can only insert one block at a time. */ 2598 PetscInt j; 2599 for (j = 0; j < ncols; j++) { 2600 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2601 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2602 } 2603 } 2604 } 2605 2606 if (!V) PetscCall(PetscFree(values)); 2607 nooffprocentries = B->nooffprocentries; 2608 B->nooffprocentries = PETSC_TRUE; 2609 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2610 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2611 B->nooffprocentries = nooffprocentries; 2612 2613 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2614 PetscFunctionReturn(PETSC_SUCCESS); 2615 } 2616 2617 /*@C 2618 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2619 2620 Collective 2621 2622 Input Parameters: 2623 + B - the matrix 2624 . bs - the block size 2625 . i - the indices into `j` for the start of each local row (starts with zero) 2626 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2627 - v - optional values in the matrix, use `NULL` if not provided 2628 2629 Level: advanced 2630 2631 Notes: 2632 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2633 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2634 called this routine. 2635 2636 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2637 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2638 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2639 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2640 block column and the second index is over columns within a block. 2641 2642 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2643 2644 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2645 @*/ 2646 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2647 { 2648 PetscFunctionBegin; 2649 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2650 PetscValidType(B, 1); 2651 PetscValidLogicalCollectiveInt(B, bs, 2); 2652 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2653 PetscFunctionReturn(PETSC_SUCCESS); 2654 } 2655 2656 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2657 { 2658 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2659 PetscInt i; 2660 PetscMPIInt size; 2661 2662 PetscFunctionBegin; 2663 if (B->hash_active) { 2664 B->ops[0] = b->cops; 2665 B->hash_active = PETSC_FALSE; 2666 } 2667 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2668 PetscCall(MatSetBlockSize(B, bs)); 2669 PetscCall(PetscLayoutSetUp(B->rmap)); 2670 PetscCall(PetscLayoutSetUp(B->cmap)); 2671 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2672 2673 if (d_nnz) { 2674 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2675 } 2676 if (o_nnz) { 2677 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2678 } 2679 2680 b->bs2 = bs * bs; 2681 b->mbs = B->rmap->n / bs; 2682 b->nbs = B->cmap->n / bs; 2683 b->Mbs = B->rmap->N / bs; 2684 b->Nbs = B->cmap->N / bs; 2685 2686 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2687 b->rstartbs = B->rmap->rstart / bs; 2688 b->rendbs = B->rmap->rend / bs; 2689 b->cstartbs = B->cmap->rstart / bs; 2690 b->cendbs = B->cmap->rend / bs; 2691 2692 #if defined(PETSC_USE_CTABLE) 2693 PetscCall(PetscHMapIDestroy(&b->colmap)); 2694 #else 2695 PetscCall(PetscFree(b->colmap)); 2696 #endif 2697 PetscCall(PetscFree(b->garray)); 2698 PetscCall(VecDestroy(&b->lvec)); 2699 PetscCall(VecScatterDestroy(&b->Mvctx)); 2700 2701 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2702 2703 MatSeqXAIJGetOptions_Private(b->B); 2704 PetscCall(MatDestroy(&b->B)); 2705 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2706 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2707 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2708 MatSeqXAIJRestoreOptions_Private(b->B); 2709 2710 MatSeqXAIJGetOptions_Private(b->A); 2711 PetscCall(MatDestroy(&b->A)); 2712 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2713 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2714 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2715 MatSeqXAIJRestoreOptions_Private(b->A); 2716 2717 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2718 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2719 B->preallocated = PETSC_TRUE; 2720 B->was_assembled = PETSC_FALSE; 2721 B->assembled = PETSC_FALSE; 2722 PetscFunctionReturn(PETSC_SUCCESS); 2723 } 2724 2725 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2726 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2727 2728 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2729 { 2730 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2731 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2732 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2733 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2734 2735 PetscFunctionBegin; 2736 PetscCall(PetscMalloc1(M + 1, &ii)); 2737 ii[0] = 0; 2738 for (i = 0; i < M; i++) { 2739 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2740 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2741 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2742 /* remove one from count of matrix has diagonal */ 2743 for (j = id[i]; j < id[i + 1]; j++) { 2744 if (jd[j] == i) { 2745 ii[i + 1]--; 2746 break; 2747 } 2748 } 2749 } 2750 PetscCall(PetscMalloc1(ii[M], &jj)); 2751 cnt = 0; 2752 for (i = 0; i < M; i++) { 2753 for (j = io[i]; j < io[i + 1]; j++) { 2754 if (garray[jo[j]] > rstart) break; 2755 jj[cnt++] = garray[jo[j]]; 2756 } 2757 for (k = id[i]; k < id[i + 1]; k++) { 2758 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2759 } 2760 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2761 } 2762 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2763 PetscFunctionReturn(PETSC_SUCCESS); 2764 } 2765 2766 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2767 2768 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2769 2770 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2771 { 2772 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2773 Mat_MPIAIJ *b; 2774 Mat B; 2775 2776 PetscFunctionBegin; 2777 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2778 2779 if (reuse == MAT_REUSE_MATRIX) { 2780 B = *newmat; 2781 } else { 2782 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2783 PetscCall(MatSetType(B, MATMPIAIJ)); 2784 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2785 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2786 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2787 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2788 } 2789 b = (Mat_MPIAIJ *)B->data; 2790 2791 if (reuse == MAT_REUSE_MATRIX) { 2792 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2793 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2794 } else { 2795 PetscInt *garray = a->garray; 2796 Mat_SeqAIJ *bB; 2797 PetscInt bs, nnz; 2798 PetscCall(MatDestroy(&b->A)); 2799 PetscCall(MatDestroy(&b->B)); 2800 /* just clear out the data structure */ 2801 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2802 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2803 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2804 2805 /* Global numbering for b->B columns */ 2806 bB = (Mat_SeqAIJ *)b->B->data; 2807 bs = A->rmap->bs; 2808 nnz = bB->i[A->rmap->n]; 2809 for (PetscInt k = 0; k < nnz; k++) { 2810 PetscInt bj = bB->j[k] / bs; 2811 PetscInt br = bB->j[k] % bs; 2812 bB->j[k] = garray[bj] * bs + br; 2813 } 2814 } 2815 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2816 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2817 2818 if (reuse == MAT_INPLACE_MATRIX) { 2819 PetscCall(MatHeaderReplace(A, &B)); 2820 } else { 2821 *newmat = B; 2822 } 2823 PetscFunctionReturn(PETSC_SUCCESS); 2824 } 2825 2826 /*MC 2827 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2828 2829 Options Database Keys: 2830 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2831 . -mat_block_size <bs> - set the blocksize used to store the matrix 2832 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2833 - -mat_use_hash_table <fact> - set hash table factor 2834 2835 Level: beginner 2836 2837 Note: 2838 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2839 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2840 2841 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2842 M*/ 2843 2844 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2845 2846 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2847 { 2848 Mat_MPIBAIJ *b; 2849 PetscBool flg = PETSC_FALSE; 2850 2851 PetscFunctionBegin; 2852 PetscCall(PetscNew(&b)); 2853 B->data = (void *)b; 2854 B->ops[0] = MatOps_Values; 2855 B->assembled = PETSC_FALSE; 2856 2857 B->insertmode = NOT_SET_VALUES; 2858 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2859 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2860 2861 /* build local table of row and column ownerships */ 2862 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2863 2864 /* build cache for off array entries formed */ 2865 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2866 2867 b->donotstash = PETSC_FALSE; 2868 b->colmap = NULL; 2869 b->garray = NULL; 2870 b->roworiented = PETSC_TRUE; 2871 2872 /* stuff used in block assembly */ 2873 b->barray = NULL; 2874 2875 /* stuff used for matrix vector multiply */ 2876 b->lvec = NULL; 2877 b->Mvctx = NULL; 2878 2879 /* stuff for MatGetRow() */ 2880 b->rowindices = NULL; 2881 b->rowvalues = NULL; 2882 b->getrowactive = PETSC_FALSE; 2883 2884 /* hash table stuff */ 2885 b->ht = NULL; 2886 b->hd = NULL; 2887 b->ht_size = 0; 2888 b->ht_flag = PETSC_FALSE; 2889 b->ht_fact = 0; 2890 b->ht_total_ct = 0; 2891 b->ht_insert_ct = 0; 2892 2893 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2894 b->ijonly = PETSC_FALSE; 2895 2896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2899 #if defined(PETSC_HAVE_HYPRE) 2900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2901 #endif 2902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2909 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2910 2911 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2912 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2913 if (flg) { 2914 PetscReal fact = 1.39; 2915 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2916 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2917 if (fact <= 1.0) fact = 1.39; 2918 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2919 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2920 } 2921 PetscOptionsEnd(); 2922 PetscFunctionReturn(PETSC_SUCCESS); 2923 } 2924 2925 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2926 /*MC 2927 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2928 2929 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2930 and `MATMPIBAIJ` otherwise. 2931 2932 Options Database Keys: 2933 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2934 2935 Level: beginner 2936 2937 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2938 M*/ 2939 2940 /*@ 2941 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2942 (block compressed row). 2943 2944 Collective 2945 2946 Input Parameters: 2947 + B - the matrix 2948 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2949 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2950 . d_nz - number of block nonzeros per block row in diagonal portion of local 2951 submatrix (same for all local rows) 2952 . d_nnz - array containing the number of block nonzeros in the various block rows 2953 of the in diagonal portion of the local (possibly different for each block 2954 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2955 set it even if it is zero. 2956 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2957 submatrix (same for all local rows). 2958 - o_nnz - array containing the number of nonzeros in the various block rows of the 2959 off-diagonal portion of the local submatrix (possibly different for 2960 each block row) or `NULL`. 2961 2962 If the *_nnz parameter is given then the *_nz parameter is ignored 2963 2964 Options Database Keys: 2965 + -mat_block_size - size of the blocks to use 2966 - -mat_use_hash_table <fact> - set hash table factor 2967 2968 Level: intermediate 2969 2970 Notes: 2971 For good matrix assembly performance 2972 the user should preallocate the matrix storage by setting the parameters 2973 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 2974 performance can be increased by more than a factor of 50. 2975 2976 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2977 than it must be used on all processors that share the object for that argument. 2978 2979 Storage Information: 2980 For a square global matrix we define each processor's diagonal portion 2981 to be its local rows and the corresponding columns (a square submatrix); 2982 each processor's off-diagonal portion encompasses the remainder of the 2983 local matrix (a rectangular submatrix). 2984 2985 The user can specify preallocated storage for the diagonal part of 2986 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2987 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2988 memory allocation. Likewise, specify preallocated storage for the 2989 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2990 2991 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2992 the figure below we depict these three local rows and all columns (0-11). 2993 2994 .vb 2995 0 1 2 3 4 5 6 7 8 9 10 11 2996 -------------------------- 2997 row 3 |o o o d d d o o o o o o 2998 row 4 |o o o d d d o o o o o o 2999 row 5 |o o o d d d o o o o o o 3000 -------------------------- 3001 .ve 3002 3003 Thus, any entries in the d locations are stored in the d (diagonal) 3004 submatrix, and any entries in the o locations are stored in the 3005 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3006 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3007 3008 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3009 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3010 In general, for PDE problems in which most nonzeros are near the diagonal, 3011 one expects `d_nz` >> `o_nz`. 3012 3013 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3014 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3015 You can also run with the option `-info` and look for messages with the string 3016 malloc in them to see if additional memory allocation was needed. 3017 3018 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3019 @*/ 3020 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3021 { 3022 PetscFunctionBegin; 3023 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3024 PetscValidType(B, 1); 3025 PetscValidLogicalCollectiveInt(B, bs, 2); 3026 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3027 PetscFunctionReturn(PETSC_SUCCESS); 3028 } 3029 3030 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3031 /*@ 3032 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3033 (block compressed row). 3034 3035 Collective 3036 3037 Input Parameters: 3038 + comm - MPI communicator 3039 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3040 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3041 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3042 This value should be the same as the local size used in creating the 3043 y vector for the matrix-vector product y = Ax. 3044 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3045 This value should be the same as the local size used in creating the 3046 x vector for the matrix-vector product y = Ax. 3047 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3048 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3049 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3050 submatrix (same for all local rows) 3051 . d_nnz - array containing the number of nonzero blocks in the various block rows 3052 of the in diagonal portion of the local (possibly different for each block 3053 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3054 and set it even if it is zero. 3055 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3056 submatrix (same for all local rows). 3057 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3058 off-diagonal portion of the local submatrix (possibly different for 3059 each block row) or NULL. 3060 3061 Output Parameter: 3062 . A - the matrix 3063 3064 Options Database Keys: 3065 + -mat_block_size - size of the blocks to use 3066 - -mat_use_hash_table <fact> - set hash table factor 3067 3068 Level: intermediate 3069 3070 Notes: 3071 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3072 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3073 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3074 3075 For good matrix assembly performance 3076 the user should preallocate the matrix storage by setting the parameters 3077 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3078 performance can be increased by more than a factor of 50. 3079 3080 If the *_nnz parameter is given then the *_nz parameter is ignored 3081 3082 A nonzero block is any block that as 1 or more nonzeros in it 3083 3084 The user MUST specify either the local or global matrix dimensions 3085 (possibly both). 3086 3087 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3088 than it must be used on all processors that share the object for that argument. 3089 3090 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3091 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3092 3093 Storage Information: 3094 For a square global matrix we define each processor's diagonal portion 3095 to be its local rows and the corresponding columns (a square submatrix); 3096 each processor's off-diagonal portion encompasses the remainder of the 3097 local matrix (a rectangular submatrix). 3098 3099 The user can specify preallocated storage for the diagonal part of 3100 the local submatrix with either d_nz or d_nnz (not both). Set 3101 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3102 memory allocation. Likewise, specify preallocated storage for the 3103 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3104 3105 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3106 the figure below we depict these three local rows and all columns (0-11). 3107 3108 .vb 3109 0 1 2 3 4 5 6 7 8 9 10 11 3110 -------------------------- 3111 row 3 |o o o d d d o o o o o o 3112 row 4 |o o o d d d o o o o o o 3113 row 5 |o o o d d d o o o o o o 3114 -------------------------- 3115 .ve 3116 3117 Thus, any entries in the d locations are stored in the d (diagonal) 3118 submatrix, and any entries in the o locations are stored in the 3119 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3120 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3121 3122 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3123 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3124 In general, for PDE problems in which most nonzeros are near the diagonal, 3125 one expects `d_nz` >> `o_nz`. 3126 3127 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3128 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3129 @*/ 3130 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3131 { 3132 PetscMPIInt size; 3133 3134 PetscFunctionBegin; 3135 PetscCall(MatCreate(comm, A)); 3136 PetscCall(MatSetSizes(*A, m, n, M, N)); 3137 PetscCallMPI(MPI_Comm_size(comm, &size)); 3138 if (size > 1) { 3139 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3140 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3141 } else { 3142 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3143 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3144 } 3145 PetscFunctionReturn(PETSC_SUCCESS); 3146 } 3147 3148 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3149 { 3150 Mat mat; 3151 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3152 PetscInt len = 0; 3153 3154 PetscFunctionBegin; 3155 *newmat = NULL; 3156 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3157 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3158 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3159 3160 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3161 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3162 if (matin->hash_active) { 3163 PetscCall(MatSetUp(mat)); 3164 } else { 3165 mat->factortype = matin->factortype; 3166 mat->preallocated = PETSC_TRUE; 3167 mat->assembled = PETSC_TRUE; 3168 mat->insertmode = NOT_SET_VALUES; 3169 3170 a = (Mat_MPIBAIJ *)mat->data; 3171 mat->rmap->bs = matin->rmap->bs; 3172 a->bs2 = oldmat->bs2; 3173 a->mbs = oldmat->mbs; 3174 a->nbs = oldmat->nbs; 3175 a->Mbs = oldmat->Mbs; 3176 a->Nbs = oldmat->Nbs; 3177 3178 a->size = oldmat->size; 3179 a->rank = oldmat->rank; 3180 a->donotstash = oldmat->donotstash; 3181 a->roworiented = oldmat->roworiented; 3182 a->rowindices = NULL; 3183 a->rowvalues = NULL; 3184 a->getrowactive = PETSC_FALSE; 3185 a->barray = NULL; 3186 a->rstartbs = oldmat->rstartbs; 3187 a->rendbs = oldmat->rendbs; 3188 a->cstartbs = oldmat->cstartbs; 3189 a->cendbs = oldmat->cendbs; 3190 3191 /* hash table stuff */ 3192 a->ht = NULL; 3193 a->hd = NULL; 3194 a->ht_size = 0; 3195 a->ht_flag = oldmat->ht_flag; 3196 a->ht_fact = oldmat->ht_fact; 3197 a->ht_total_ct = 0; 3198 a->ht_insert_ct = 0; 3199 3200 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3201 if (oldmat->colmap) { 3202 #if defined(PETSC_USE_CTABLE) 3203 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3204 #else 3205 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3206 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3207 #endif 3208 } else a->colmap = NULL; 3209 3210 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3211 PetscCall(PetscMalloc1(len, &a->garray)); 3212 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3213 } else a->garray = NULL; 3214 3215 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3216 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3217 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3218 3219 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3220 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3221 } 3222 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3223 *newmat = mat; 3224 PetscFunctionReturn(PETSC_SUCCESS); 3225 } 3226 3227 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3228 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3229 { 3230 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3231 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3232 PetscScalar *matvals; 3233 3234 PetscFunctionBegin; 3235 PetscCall(PetscViewerSetUp(viewer)); 3236 3237 /* read in matrix header */ 3238 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3239 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3240 M = header[1]; 3241 N = header[2]; 3242 nz = header[3]; 3243 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3244 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3245 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3246 3247 /* set block sizes from the viewer's .info file */ 3248 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3249 /* set local sizes if not set already */ 3250 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3251 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3252 /* set global sizes if not set already */ 3253 if (mat->rmap->N < 0) mat->rmap->N = M; 3254 if (mat->cmap->N < 0) mat->cmap->N = N; 3255 PetscCall(PetscLayoutSetUp(mat->rmap)); 3256 PetscCall(PetscLayoutSetUp(mat->cmap)); 3257 3258 /* check if the matrix sizes are correct */ 3259 PetscCall(MatGetSize(mat, &rows, &cols)); 3260 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3261 PetscCall(MatGetBlockSize(mat, &bs)); 3262 PetscCall(MatGetLocalSize(mat, &m, &n)); 3263 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3264 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3265 mbs = m / bs; 3266 nbs = n / bs; 3267 3268 /* read in row lengths and build row indices */ 3269 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3270 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3271 rowidxs[0] = 0; 3272 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3273 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3274 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3275 3276 /* read in column indices and matrix values */ 3277 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3278 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3279 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3280 3281 { /* preallocate matrix storage */ 3282 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3283 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3284 PetscBool sbaij, done; 3285 PetscInt *d_nnz, *o_nnz; 3286 3287 PetscCall(PetscBTCreate(nbs, &bt)); 3288 PetscCall(PetscHSetICreate(&ht)); 3289 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3290 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3291 for (i = 0; i < mbs; i++) { 3292 PetscCall(PetscBTMemzero(nbs, bt)); 3293 PetscCall(PetscHSetIClear(ht)); 3294 for (k = 0; k < bs; k++) { 3295 PetscInt row = bs * i + k; 3296 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3297 PetscInt col = colidxs[j]; 3298 if (!sbaij || col >= row) { 3299 if (col >= cs && col < ce) { 3300 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3301 } else { 3302 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3303 if (done) o_nnz[i]++; 3304 } 3305 } 3306 } 3307 } 3308 } 3309 PetscCall(PetscBTDestroy(&bt)); 3310 PetscCall(PetscHSetIDestroy(&ht)); 3311 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3312 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3313 PetscCall(PetscFree2(d_nnz, o_nnz)); 3314 } 3315 3316 /* store matrix values */ 3317 for (i = 0; i < m; i++) { 3318 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3319 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3320 } 3321 3322 PetscCall(PetscFree(rowidxs)); 3323 PetscCall(PetscFree2(colidxs, matvals)); 3324 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3325 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3326 PetscFunctionReturn(PETSC_SUCCESS); 3327 } 3328 3329 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3330 { 3331 PetscBool isbinary; 3332 3333 PetscFunctionBegin; 3334 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3335 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3336 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3337 PetscFunctionReturn(PETSC_SUCCESS); 3338 } 3339 3340 /*@ 3341 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3342 3343 Input Parameters: 3344 + mat - the matrix 3345 - fact - factor 3346 3347 Options Database Key: 3348 . -mat_use_hash_table <fact> - provide the factor 3349 3350 Level: advanced 3351 3352 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3353 @*/ 3354 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3355 { 3356 PetscFunctionBegin; 3357 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3358 PetscFunctionReturn(PETSC_SUCCESS); 3359 } 3360 3361 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3362 { 3363 Mat_MPIBAIJ *baij; 3364 3365 PetscFunctionBegin; 3366 baij = (Mat_MPIBAIJ *)mat->data; 3367 baij->ht_fact = fact; 3368 PetscFunctionReturn(PETSC_SUCCESS); 3369 } 3370 3371 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3372 { 3373 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3374 PetscBool flg; 3375 3376 PetscFunctionBegin; 3377 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3378 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3379 if (Ad) *Ad = a->A; 3380 if (Ao) *Ao = a->B; 3381 if (colmap) *colmap = a->garray; 3382 PetscFunctionReturn(PETSC_SUCCESS); 3383 } 3384 3385 /* 3386 Special version for direct calls from Fortran (to eliminate two function call overheads 3387 */ 3388 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3389 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3390 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3391 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3392 #endif 3393 3394 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3395 /*@C 3396 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3397 3398 Collective 3399 3400 Input Parameters: 3401 + matin - the matrix 3402 . min - number of input rows 3403 . im - input rows 3404 . nin - number of input columns 3405 . in - input columns 3406 . v - numerical values input 3407 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3408 3409 Level: advanced 3410 3411 Developer Notes: 3412 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3413 3414 .seealso: `Mat`, `MatSetValuesBlocked()` 3415 @*/ 3416 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3417 { 3418 /* convert input arguments to C version */ 3419 Mat mat = *matin; 3420 PetscInt m = *min, n = *nin; 3421 InsertMode addv = *addvin; 3422 3423 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3424 const MatScalar *value; 3425 MatScalar *barray = baij->barray; 3426 PetscBool roworiented = baij->roworiented; 3427 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3428 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3429 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3430 3431 PetscFunctionBegin; 3432 /* tasks normally handled by MatSetValuesBlocked() */ 3433 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3434 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3435 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3436 if (mat->assembled) { 3437 mat->was_assembled = PETSC_TRUE; 3438 mat->assembled = PETSC_FALSE; 3439 } 3440 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3441 3442 if (!barray) { 3443 PetscCall(PetscMalloc1(bs2, &barray)); 3444 baij->barray = barray; 3445 } 3446 3447 if (roworiented) stepval = (n - 1) * bs; 3448 else stepval = (m - 1) * bs; 3449 3450 for (i = 0; i < m; i++) { 3451 if (im[i] < 0) continue; 3452 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3453 if (im[i] >= rstart && im[i] < rend) { 3454 row = im[i] - rstart; 3455 for (j = 0; j < n; j++) { 3456 /* If NumCol = 1 then a copy is not required */ 3457 if ((roworiented) && (n == 1)) { 3458 barray = (MatScalar *)v + i * bs2; 3459 } else if ((!roworiented) && (m == 1)) { 3460 barray = (MatScalar *)v + j * bs2; 3461 } else { /* Here a copy is required */ 3462 if (roworiented) { 3463 value = v + i * (stepval + bs) * bs + j * bs; 3464 } else { 3465 value = v + j * (stepval + bs) * bs + i * bs; 3466 } 3467 for (ii = 0; ii < bs; ii++, value += stepval) { 3468 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3469 } 3470 barray -= bs2; 3471 } 3472 3473 if (in[j] >= cstart && in[j] < cend) { 3474 col = in[j] - cstart; 3475 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3476 } else if (in[j] < 0) { 3477 continue; 3478 } else { 3479 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3480 if (mat->was_assembled) { 3481 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3482 3483 #if defined(PETSC_USE_DEBUG) 3484 #if defined(PETSC_USE_CTABLE) 3485 { 3486 PetscInt data; 3487 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3488 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3489 } 3490 #else 3491 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3492 #endif 3493 #endif 3494 #if defined(PETSC_USE_CTABLE) 3495 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3496 col = (col - 1) / bs; 3497 #else 3498 col = (baij->colmap[in[j]] - 1) / bs; 3499 #endif 3500 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3501 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3502 col = in[j]; 3503 } 3504 } else col = in[j]; 3505 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3506 } 3507 } 3508 } else { 3509 if (!baij->donotstash) { 3510 if (roworiented) { 3511 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3512 } else { 3513 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3514 } 3515 } 3516 } 3517 } 3518 3519 /* task normally handled by MatSetValuesBlocked() */ 3520 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3521 PetscFunctionReturn(PETSC_SUCCESS); 3522 } 3523 3524 /*@ 3525 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3526 3527 Collective 3528 3529 Input Parameters: 3530 + comm - MPI communicator 3531 . bs - the block size, only a block size of 1 is supported 3532 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3533 . n - This value should be the same as the local size used in creating the 3534 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3535 calculated if `N` is given) For square matrices `n` is almost always `m`. 3536 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3537 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3538 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3539 . j - column indices 3540 - a - matrix values 3541 3542 Output Parameter: 3543 . mat - the matrix 3544 3545 Level: intermediate 3546 3547 Notes: 3548 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3549 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3550 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3551 3552 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3553 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3554 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3555 with column-major ordering within blocks. 3556 3557 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3558 3559 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3560 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3561 @*/ 3562 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3563 { 3564 PetscFunctionBegin; 3565 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3566 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3567 PetscCall(MatCreate(comm, mat)); 3568 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3569 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3570 PetscCall(MatSetBlockSize(*mat, bs)); 3571 PetscCall(MatSetUp(*mat)); 3572 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3573 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3574 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3575 PetscFunctionReturn(PETSC_SUCCESS); 3576 } 3577 3578 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3579 { 3580 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3581 PetscInt *indx; 3582 PetscScalar *values; 3583 3584 PetscFunctionBegin; 3585 PetscCall(MatGetSize(inmat, &m, &N)); 3586 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3587 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3588 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3589 PetscInt *bindx, rmax = a->rmax, j; 3590 PetscMPIInt rank, size; 3591 3592 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3593 mbs = m / bs; 3594 Nbs = N / cbs; 3595 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3596 nbs = n / cbs; 3597 3598 PetscCall(PetscMalloc1(rmax, &bindx)); 3599 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3600 3601 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3602 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3603 if (rank == size - 1) { 3604 /* Check sum(nbs) = Nbs */ 3605 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3606 } 3607 3608 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3609 for (i = 0; i < mbs; i++) { 3610 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3611 nnz = nnz / bs; 3612 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3613 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3614 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3615 } 3616 PetscCall(PetscFree(bindx)); 3617 3618 PetscCall(MatCreate(comm, outmat)); 3619 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3620 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3621 PetscCall(MatSetType(*outmat, MATBAIJ)); 3622 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3623 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3624 MatPreallocateEnd(dnz, onz); 3625 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3626 } 3627 3628 /* numeric phase */ 3629 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3630 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3631 3632 for (i = 0; i < m; i++) { 3633 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3634 Ii = i + rstart; 3635 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3636 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3637 } 3638 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3639 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3640 PetscFunctionReturn(PETSC_SUCCESS); 3641 } 3642