1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *vv; 62 Vec vB, vA; 63 const PetscScalar *va, *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayRead(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(v, &vv)); 92 PetscCall(VecRestoreArrayRead(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 690 { 691 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 692 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 693 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 694 PetscReal sum = 0.0; 695 MatScalar *v; 696 697 PetscFunctionBegin; 698 if (baij->size == 1) { 699 PetscCall(MatNorm(baij->A, type, nrm)); 700 } else { 701 if (type == NORM_FROBENIUS) { 702 v = amat->a; 703 nz = amat->nz * bs2; 704 for (i = 0; i < nz; i++) { 705 sum += PetscRealPart(PetscConj(*v) * (*v)); 706 v++; 707 } 708 v = bmat->a; 709 nz = bmat->nz * bs2; 710 for (i = 0; i < nz; i++) { 711 sum += PetscRealPart(PetscConj(*v) * (*v)); 712 v++; 713 } 714 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 715 *nrm = PetscSqrtReal(*nrm); 716 } else if (type == NORM_1) { /* max column sum */ 717 PetscReal *tmp; 718 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 719 720 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 721 v = amat->a; 722 jj = amat->j; 723 for (i = 0; i < amat->nz; i++) { 724 for (j = 0; j < bs; j++) { 725 col = bs * (cstart + *jj) + j; /* column index */ 726 for (row = 0; row < bs; row++) { 727 tmp[col] += PetscAbsScalar(*v); 728 v++; 729 } 730 } 731 jj++; 732 } 733 v = bmat->a; 734 jj = bmat->j; 735 for (i = 0; i < bmat->nz; i++) { 736 for (j = 0; j < bs; j++) { 737 col = bs * garray[*jj] + j; 738 for (row = 0; row < bs; row++) { 739 tmp[col] += PetscAbsScalar(*v); 740 v++; 741 } 742 } 743 jj++; 744 } 745 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 746 *nrm = 0.0; 747 for (j = 0; j < mat->cmap->N; j++) { 748 if (tmp[j] > *nrm) *nrm = tmp[j]; 749 } 750 PetscCall(PetscFree(tmp)); 751 } else if (type == NORM_INFINITY) { /* max row sum */ 752 PetscReal *sums; 753 PetscCall(PetscMalloc1(bs, &sums)); 754 sum = 0.0; 755 for (j = 0; j < amat->mbs; j++) { 756 for (row = 0; row < bs; row++) sums[row] = 0.0; 757 v = amat->a + bs2 * amat->i[j]; 758 nz = amat->i[j + 1] - amat->i[j]; 759 for (i = 0; i < nz; i++) { 760 for (col = 0; col < bs; col++) { 761 for (row = 0; row < bs; row++) { 762 sums[row] += PetscAbsScalar(*v); 763 v++; 764 } 765 } 766 } 767 v = bmat->a + bs2 * bmat->i[j]; 768 nz = bmat->i[j + 1] - bmat->i[j]; 769 for (i = 0; i < nz; i++) { 770 for (col = 0; col < bs; col++) { 771 for (row = 0; row < bs; row++) { 772 sums[row] += PetscAbsScalar(*v); 773 v++; 774 } 775 } 776 } 777 for (row = 0; row < bs; row++) { 778 if (sums[row] > sum) sum = sums[row]; 779 } 780 } 781 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 782 PetscCall(PetscFree(sums)); 783 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 784 } 785 PetscFunctionReturn(PETSC_SUCCESS); 786 } 787 788 /* 789 Creates the hash table, and sets the table 790 This table is created only once. 791 If new entries need to be added to the matrix 792 then the hash table has to be destroyed and 793 recreated. 794 */ 795 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 796 { 797 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 798 Mat A = baij->A, B = baij->B; 799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 800 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 801 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 802 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 803 PetscInt *HT, key; 804 MatScalar **HD; 805 PetscReal tmp; 806 #if defined(PETSC_USE_INFO) 807 PetscInt ct = 0, max = 0; 808 #endif 809 810 PetscFunctionBegin; 811 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 812 813 baij->ht_size = (PetscInt)(factor * nz); 814 ht_size = baij->ht_size; 815 816 /* Allocate Memory for Hash Table */ 817 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 818 HD = baij->hd; 819 HT = baij->ht; 820 821 /* Loop Over A */ 822 for (i = 0; i < a->mbs; i++) { 823 for (j = ai[i]; j < ai[i + 1]; j++) { 824 row = i + rstart; 825 col = aj[j] + cstart; 826 827 key = row * Nbs + col + 1; 828 h1 = HASH(ht_size, key, tmp); 829 for (k = 0; k < ht_size; k++) { 830 if (!HT[(h1 + k) % ht_size]) { 831 HT[(h1 + k) % ht_size] = key; 832 HD[(h1 + k) % ht_size] = a->a + j * bs2; 833 break; 834 #if defined(PETSC_USE_INFO) 835 } else { 836 ct++; 837 #endif 838 } 839 } 840 #if defined(PETSC_USE_INFO) 841 if (k > max) max = k; 842 #endif 843 } 844 } 845 /* Loop Over B */ 846 for (i = 0; i < b->mbs; i++) { 847 for (j = bi[i]; j < bi[i + 1]; j++) { 848 row = i + rstart; 849 col = garray[bj[j]]; 850 key = row * Nbs + col + 1; 851 h1 = HASH(ht_size, key, tmp); 852 for (k = 0; k < ht_size; k++) { 853 if (!HT[(h1 + k) % ht_size]) { 854 HT[(h1 + k) % ht_size] = key; 855 HD[(h1 + k) % ht_size] = b->a + j * bs2; 856 break; 857 #if defined(PETSC_USE_INFO) 858 } else { 859 ct++; 860 #endif 861 } 862 } 863 #if defined(PETSC_USE_INFO) 864 if (k > max) max = k; 865 #endif 866 } 867 } 868 869 /* Print Summary */ 870 #if defined(PETSC_USE_INFO) 871 for (i = 0, j = 0; i < ht_size; i++) { 872 if (HT[i]) j++; 873 } 874 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 875 #endif 876 PetscFunctionReturn(PETSC_SUCCESS); 877 } 878 879 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 880 { 881 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 882 PetscInt nstash, reallocs; 883 884 PetscFunctionBegin; 885 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 886 887 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 888 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 889 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 890 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 891 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 892 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 893 PetscFunctionReturn(PETSC_SUCCESS); 894 } 895 896 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 897 { 898 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 900 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 901 PetscInt *row, *col; 902 PetscBool r1, r2, r3, all_assembled; 903 MatScalar *val; 904 PetscMPIInt n; 905 906 PetscFunctionBegin; 907 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 908 if (!baij->donotstash && !mat->nooffprocentries) { 909 while (1) { 910 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 911 if (!flg) break; 912 913 for (i = 0; i < n;) { 914 /* Now identify the consecutive vals belonging to the same row */ 915 for (j = i, rstart = row[j]; j < n; j++) { 916 if (row[j] != rstart) break; 917 } 918 if (j < n) ncols = j - i; 919 else ncols = n - i; 920 /* Now assemble all these values with a single function call */ 921 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 922 i = j; 923 } 924 } 925 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 926 /* Now process the block-stash. Since the values are stashed column-oriented, 927 set the row-oriented flag to column-oriented, and after MatSetValues() 928 restore the original flags */ 929 r1 = baij->roworiented; 930 r2 = a->roworiented; 931 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 932 933 baij->roworiented = PETSC_FALSE; 934 a->roworiented = PETSC_FALSE; 935 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 936 while (1) { 937 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 938 if (!flg) break; 939 940 for (i = 0; i < n;) { 941 /* Now identify the consecutive vals belonging to the same row */ 942 for (j = i, rstart = row[j]; j < n; j++) { 943 if (row[j] != rstart) break; 944 } 945 if (j < n) ncols = j - i; 946 else ncols = n - i; 947 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 948 i = j; 949 } 950 } 951 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 952 953 baij->roworiented = r1; 954 a->roworiented = r2; 955 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 956 } 957 958 PetscCall(MatAssemblyBegin(baij->A, mode)); 959 PetscCall(MatAssemblyEnd(baij->A, mode)); 960 961 /* determine if any process has disassembled, if so we must 962 also disassemble ourselves, in order that we may reassemble. */ 963 /* 964 if nonzero structure of submatrix B cannot change then we know that 965 no process disassembled thus we can skip this stuff 966 */ 967 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 968 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 969 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 970 } 971 972 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 973 PetscCall(MatAssemblyBegin(baij->B, mode)); 974 PetscCall(MatAssemblyEnd(baij->B, mode)); 975 976 #if defined(PETSC_USE_INFO) 977 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 978 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 979 980 baij->ht_total_ct = 0; 981 baij->ht_insert_ct = 0; 982 } 983 #endif 984 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 985 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 986 987 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 988 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 989 } 990 991 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 992 993 baij->rowvalues = NULL; 994 995 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 996 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 997 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 998 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 999 } 1000 PetscFunctionReturn(PETSC_SUCCESS); 1001 } 1002 1003 #include <petscdraw.h> 1004 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1005 { 1006 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1007 PetscMPIInt rank = baij->rank; 1008 PetscInt bs = mat->rmap->bs; 1009 PetscBool isascii, isdraw; 1010 PetscViewer sviewer; 1011 PetscViewerFormat format; 1012 1013 PetscFunctionBegin; 1014 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1015 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1016 if (isascii) { 1017 PetscCall(PetscViewerGetFormat(viewer, &format)); 1018 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1019 MatInfo info; 1020 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1021 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1022 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1023 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1024 mat->rmap->bs, info.memory)); 1025 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1026 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1027 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1028 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1029 PetscCall(PetscViewerFlush(viewer)); 1030 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1031 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1032 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1035 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } 1040 } 1041 1042 if (isdraw) { 1043 PetscDraw draw; 1044 PetscBool isnull; 1045 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1046 PetscCall(PetscDrawIsNull(draw, &isnull)); 1047 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1048 } 1049 1050 { 1051 /* assemble the entire matrix onto first processor. */ 1052 Mat A; 1053 Mat_SeqBAIJ *Aloc; 1054 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1055 MatScalar *a; 1056 const char *matname; 1057 1058 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1059 /* Perhaps this should be the type of mat? */ 1060 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1061 if (rank == 0) { 1062 PetscCall(MatSetSizes(A, M, N, M, N)); 1063 } else { 1064 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1065 } 1066 PetscCall(MatSetType(A, MATMPIBAIJ)); 1067 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1068 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1069 1070 /* copy over the A part */ 1071 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1072 ai = Aloc->i; 1073 aj = Aloc->j; 1074 a = Aloc->a; 1075 PetscCall(PetscMalloc1(bs, &rvals)); 1076 1077 for (i = 0; i < mbs; i++) { 1078 rvals[0] = bs * (baij->rstartbs + i); 1079 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1080 for (j = ai[i]; j < ai[i + 1]; j++) { 1081 col = (baij->cstartbs + aj[j]) * bs; 1082 for (k = 0; k < bs; k++) { 1083 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1084 col++; 1085 a += bs; 1086 } 1087 } 1088 } 1089 /* copy over the B part */ 1090 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1091 ai = Aloc->i; 1092 aj = Aloc->j; 1093 a = Aloc->a; 1094 for (i = 0; i < mbs; i++) { 1095 rvals[0] = bs * (baij->rstartbs + i); 1096 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1097 for (j = ai[i]; j < ai[i + 1]; j++) { 1098 col = baij->garray[aj[j]] * bs; 1099 for (k = 0; k < bs; k++) { 1100 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1101 col++; 1102 a += bs; 1103 } 1104 } 1105 } 1106 PetscCall(PetscFree(rvals)); 1107 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1108 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1109 /* 1110 Everyone has to call to draw the matrix since the graphics waits are 1111 synchronized across all processors that share the PetscDraw object 1112 */ 1113 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1114 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1115 if (rank == 0) { 1116 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1117 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1118 } 1119 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1120 PetscCall(MatDestroy(&A)); 1121 } 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1126 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1127 { 1128 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1129 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1130 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1131 const PetscInt *garray = aij->garray; 1132 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1133 PetscCount nz, hnz; 1134 PetscInt *rowlens, *colidxs; 1135 PetscScalar *matvals; 1136 PetscMPIInt rank; 1137 1138 PetscFunctionBegin; 1139 PetscCall(PetscViewerSetUp(viewer)); 1140 1141 M = mat->rmap->N; 1142 N = mat->cmap->N; 1143 m = mat->rmap->n; 1144 rs = mat->rmap->rstart; 1145 cs = mat->cmap->rstart; 1146 bs = mat->rmap->bs; 1147 nz = bs * bs * (A->nz + B->nz); 1148 1149 /* write matrix header */ 1150 header[0] = MAT_FILE_CLASSID; 1151 header[1] = M; 1152 header[2] = N; 1153 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1154 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1155 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1156 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1157 1158 /* fill in and store row lengths */ 1159 PetscCall(PetscMalloc1(m, &rowlens)); 1160 for (cnt = 0, i = 0; i < A->mbs; i++) 1161 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1162 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1163 PetscCall(PetscFree(rowlens)); 1164 1165 /* fill in and store column indices */ 1166 PetscCall(PetscMalloc1(nz, &colidxs)); 1167 for (cnt = 0, i = 0; i < A->mbs; i++) { 1168 for (k = 0; k < bs; k++) { 1169 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1170 if (garray[B->j[jb]] > cs / bs) break; 1171 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1172 } 1173 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1174 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1175 for (; jb < B->i[i + 1]; jb++) 1176 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1177 } 1178 } 1179 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1180 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1181 PetscCall(PetscFree(colidxs)); 1182 1183 /* fill in and store nonzero values */ 1184 PetscCall(PetscMalloc1(nz, &matvals)); 1185 for (cnt = 0, i = 0; i < A->mbs; i++) { 1186 for (k = 0; k < bs; k++) { 1187 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1188 if (garray[B->j[jb]] > cs / bs) break; 1189 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1190 } 1191 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1192 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1193 for (; jb < B->i[i + 1]; jb++) 1194 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1195 } 1196 } 1197 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1198 PetscCall(PetscFree(matvals)); 1199 1200 /* write block size option to the viewer's .info file */ 1201 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1202 PetscFunctionReturn(PETSC_SUCCESS); 1203 } 1204 1205 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1206 { 1207 PetscBool isascii, isdraw, issocket, isbinary; 1208 1209 PetscFunctionBegin; 1210 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1211 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1213 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1214 if (isascii || isdraw || issocket) { 1215 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1216 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1217 PetscFunctionReturn(PETSC_SUCCESS); 1218 } 1219 1220 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1221 { 1222 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1223 PetscInt nt; 1224 1225 PetscFunctionBegin; 1226 PetscCall(VecGetLocalSize(xx, &nt)); 1227 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1228 PetscCall(VecGetLocalSize(yy, &nt)); 1229 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1230 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1231 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1232 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1233 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1234 PetscFunctionReturn(PETSC_SUCCESS); 1235 } 1236 1237 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1238 { 1239 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1240 1241 PetscFunctionBegin; 1242 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1243 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1244 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1245 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1246 PetscFunctionReturn(PETSC_SUCCESS); 1247 } 1248 1249 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1250 { 1251 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1252 1253 PetscFunctionBegin; 1254 /* do nondiagonal part */ 1255 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1256 /* do local part */ 1257 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1258 /* add partial results together */ 1259 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1260 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1261 PetscFunctionReturn(PETSC_SUCCESS); 1262 } 1263 1264 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1265 { 1266 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1267 1268 PetscFunctionBegin; 1269 /* do nondiagonal part */ 1270 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1271 /* do local part */ 1272 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1273 /* add partial results together */ 1274 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1275 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 /* 1280 This only works correctly for square matrices where the subblock A->A is the 1281 diagonal block 1282 */ 1283 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1284 { 1285 PetscFunctionBegin; 1286 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1287 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 1291 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1292 { 1293 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1294 1295 PetscFunctionBegin; 1296 PetscCall(MatScale(a->A, aa)); 1297 PetscCall(MatScale(a->B, aa)); 1298 PetscFunctionReturn(PETSC_SUCCESS); 1299 } 1300 1301 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1302 { 1303 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1304 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1305 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1306 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1307 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1308 1309 PetscFunctionBegin; 1310 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1311 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1312 mat->getrowactive = PETSC_TRUE; 1313 1314 if (!mat->rowvalues && (idx || v)) { 1315 /* 1316 allocate enough space to hold information from the longest row. 1317 */ 1318 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1319 PetscInt max = 1, mbs = mat->mbs, tmp; 1320 for (i = 0; i < mbs; i++) { 1321 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1322 if (max < tmp) max = tmp; 1323 } 1324 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1325 } 1326 lrow = row - brstart; 1327 1328 pvA = &vworkA; 1329 pcA = &cworkA; 1330 pvB = &vworkB; 1331 pcB = &cworkB; 1332 if (!v) { 1333 pvA = NULL; 1334 pvB = NULL; 1335 } 1336 if (!idx) { 1337 pcA = NULL; 1338 if (!v) pcB = NULL; 1339 } 1340 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1341 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1342 nztot = nzA + nzB; 1343 1344 cmap = mat->garray; 1345 if (v || idx) { 1346 if (nztot) { 1347 /* Sort by increasing column numbers, assuming A and B already sorted */ 1348 PetscInt imark = -1; 1349 if (v) { 1350 *v = v_p = mat->rowvalues; 1351 for (i = 0; i < nzB; i++) { 1352 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1353 else break; 1354 } 1355 imark = i; 1356 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1357 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1358 } 1359 if (idx) { 1360 *idx = idx_p = mat->rowindices; 1361 if (imark > -1) { 1362 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1363 } else { 1364 for (i = 0; i < nzB; i++) { 1365 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1366 else break; 1367 } 1368 imark = i; 1369 } 1370 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1371 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1372 } 1373 } else { 1374 if (idx) *idx = NULL; 1375 if (v) *v = NULL; 1376 } 1377 } 1378 *nz = nztot; 1379 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1380 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1381 PetscFunctionReturn(PETSC_SUCCESS); 1382 } 1383 1384 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1385 { 1386 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1387 1388 PetscFunctionBegin; 1389 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1390 baij->getrowactive = PETSC_FALSE; 1391 PetscFunctionReturn(PETSC_SUCCESS); 1392 } 1393 1394 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1395 { 1396 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1397 1398 PetscFunctionBegin; 1399 PetscCall(MatZeroEntries(l->A)); 1400 PetscCall(MatZeroEntries(l->B)); 1401 PetscFunctionReturn(PETSC_SUCCESS); 1402 } 1403 1404 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1405 { 1406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1407 Mat A = a->A, B = a->B; 1408 PetscLogDouble isend[5], irecv[5]; 1409 1410 PetscFunctionBegin; 1411 info->block_size = (PetscReal)matin->rmap->bs; 1412 1413 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1414 1415 isend[0] = info->nz_used; 1416 isend[1] = info->nz_allocated; 1417 isend[2] = info->nz_unneeded; 1418 isend[3] = info->memory; 1419 isend[4] = info->mallocs; 1420 1421 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1422 1423 isend[0] += info->nz_used; 1424 isend[1] += info->nz_allocated; 1425 isend[2] += info->nz_unneeded; 1426 isend[3] += info->memory; 1427 isend[4] += info->mallocs; 1428 1429 if (flag == MAT_LOCAL) { 1430 info->nz_used = isend[0]; 1431 info->nz_allocated = isend[1]; 1432 info->nz_unneeded = isend[2]; 1433 info->memory = isend[3]; 1434 info->mallocs = isend[4]; 1435 } else if (flag == MAT_GLOBAL_MAX) { 1436 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1437 1438 info->nz_used = irecv[0]; 1439 info->nz_allocated = irecv[1]; 1440 info->nz_unneeded = irecv[2]; 1441 info->memory = irecv[3]; 1442 info->mallocs = irecv[4]; 1443 } else if (flag == MAT_GLOBAL_SUM) { 1444 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1445 1446 info->nz_used = irecv[0]; 1447 info->nz_allocated = irecv[1]; 1448 info->nz_unneeded = irecv[2]; 1449 info->memory = irecv[3]; 1450 info->mallocs = irecv[4]; 1451 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1452 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1453 info->fill_ratio_needed = 0; 1454 info->factor_mallocs = 0; 1455 PetscFunctionReturn(PETSC_SUCCESS); 1456 } 1457 1458 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1459 { 1460 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1461 1462 PetscFunctionBegin; 1463 switch (op) { 1464 case MAT_NEW_NONZERO_LOCATIONS: 1465 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1466 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1467 case MAT_KEEP_NONZERO_PATTERN: 1468 case MAT_NEW_NONZERO_LOCATION_ERR: 1469 MatCheckPreallocated(A, 1); 1470 PetscCall(MatSetOption(a->A, op, flg)); 1471 PetscCall(MatSetOption(a->B, op, flg)); 1472 break; 1473 case MAT_ROW_ORIENTED: 1474 MatCheckPreallocated(A, 1); 1475 a->roworiented = flg; 1476 1477 PetscCall(MatSetOption(a->A, op, flg)); 1478 PetscCall(MatSetOption(a->B, op, flg)); 1479 break; 1480 case MAT_IGNORE_OFF_PROC_ENTRIES: 1481 a->donotstash = flg; 1482 break; 1483 case MAT_USE_HASH_TABLE: 1484 a->ht_flag = flg; 1485 a->ht_fact = 1.39; 1486 break; 1487 case MAT_SYMMETRIC: 1488 case MAT_STRUCTURALLY_SYMMETRIC: 1489 case MAT_HERMITIAN: 1490 case MAT_SYMMETRY_ETERNAL: 1491 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1492 case MAT_SPD_ETERNAL: 1493 /* if the diagonal matrix is square it inherits some of the properties above */ 1494 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1495 break; 1496 default: 1497 break; 1498 } 1499 PetscFunctionReturn(PETSC_SUCCESS); 1500 } 1501 1502 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1503 { 1504 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1505 Mat_SeqBAIJ *Aloc; 1506 Mat B; 1507 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1508 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1509 MatScalar *a; 1510 1511 PetscFunctionBegin; 1512 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1513 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1514 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1515 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1516 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1517 /* Do not know preallocation information, but must set block size */ 1518 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1519 } else { 1520 B = *matout; 1521 } 1522 1523 /* copy over the A part */ 1524 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1525 ai = Aloc->i; 1526 aj = Aloc->j; 1527 a = Aloc->a; 1528 PetscCall(PetscMalloc1(bs, &rvals)); 1529 1530 for (i = 0; i < mbs; i++) { 1531 rvals[0] = bs * (baij->rstartbs + i); 1532 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1533 for (j = ai[i]; j < ai[i + 1]; j++) { 1534 col = (baij->cstartbs + aj[j]) * bs; 1535 for (k = 0; k < bs; k++) { 1536 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1537 1538 col++; 1539 a += bs; 1540 } 1541 } 1542 } 1543 /* copy over the B part */ 1544 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1545 ai = Aloc->i; 1546 aj = Aloc->j; 1547 a = Aloc->a; 1548 for (i = 0; i < mbs; i++) { 1549 rvals[0] = bs * (baij->rstartbs + i); 1550 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 col = baij->garray[aj[j]] * bs; 1553 for (k = 0; k < bs; k++) { 1554 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1555 col++; 1556 a += bs; 1557 } 1558 } 1559 } 1560 PetscCall(PetscFree(rvals)); 1561 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1562 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1563 1564 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1565 else PetscCall(MatHeaderMerge(A, &B)); 1566 PetscFunctionReturn(PETSC_SUCCESS); 1567 } 1568 1569 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1570 { 1571 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1572 Mat a = baij->A, b = baij->B; 1573 PetscInt s1, s2, s3; 1574 1575 PetscFunctionBegin; 1576 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1577 if (rr) { 1578 PetscCall(VecGetLocalSize(rr, &s1)); 1579 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1580 /* Overlap communication with computation. */ 1581 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1582 } 1583 if (ll) { 1584 PetscCall(VecGetLocalSize(ll, &s1)); 1585 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1586 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1587 } 1588 /* scale the diagonal block */ 1589 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1590 1591 if (rr) { 1592 /* Do a scatter end and then right scale the off-diagonal block */ 1593 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1594 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1595 } 1596 PetscFunctionReturn(PETSC_SUCCESS); 1597 } 1598 1599 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1600 { 1601 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1602 PetscInt *lrows; 1603 PetscInt r, len; 1604 PetscBool cong; 1605 1606 PetscFunctionBegin; 1607 /* get locally owned rows */ 1608 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1609 /* fix right-hand side if needed */ 1610 if (x && b) { 1611 const PetscScalar *xx; 1612 PetscScalar *bb; 1613 1614 PetscCall(VecGetArrayRead(x, &xx)); 1615 PetscCall(VecGetArray(b, &bb)); 1616 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1617 PetscCall(VecRestoreArrayRead(x, &xx)); 1618 PetscCall(VecRestoreArray(b, &bb)); 1619 } 1620 1621 /* actually zap the local rows */ 1622 /* 1623 Zero the required rows. If the "diagonal block" of the matrix 1624 is square and the user wishes to set the diagonal we use separate 1625 code so that MatSetValues() is not called for each diagonal allocating 1626 new memory, thus calling lots of mallocs and slowing things down. 1627 1628 */ 1629 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1630 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1631 PetscCall(MatHasCongruentLayouts(A, &cong)); 1632 if ((diag != 0.0) && cong) { 1633 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1634 } else if (diag != 0.0) { 1635 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1636 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1637 for (r = 0; r < len; ++r) { 1638 const PetscInt row = lrows[r] + A->rmap->rstart; 1639 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1640 } 1641 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1642 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1643 } else { 1644 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1645 } 1646 PetscCall(PetscFree(lrows)); 1647 1648 /* only change matrix nonzero state if pattern was allowed to be changed */ 1649 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1650 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1651 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1652 } 1653 PetscFunctionReturn(PETSC_SUCCESS); 1654 } 1655 1656 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1657 { 1658 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1659 PetscMPIInt n, p = 0; 1660 PetscInt i, j, k, r, len = 0, row, col, count; 1661 PetscInt *lrows, *owners = A->rmap->range; 1662 PetscSFNode *rrows; 1663 PetscSF sf; 1664 const PetscScalar *xx; 1665 PetscScalar *bb, *mask; 1666 Vec xmask, lmask; 1667 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1668 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1669 PetscScalar *aa; 1670 1671 PetscFunctionBegin; 1672 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1673 /* Create SF where leaves are input rows and roots are owned rows */ 1674 PetscCall(PetscMalloc1(n, &lrows)); 1675 for (r = 0; r < n; ++r) lrows[r] = -1; 1676 PetscCall(PetscMalloc1(N, &rrows)); 1677 for (r = 0; r < N; ++r) { 1678 const PetscInt idx = rows[r]; 1679 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1680 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1681 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1682 } 1683 rrows[r].rank = p; 1684 rrows[r].index = rows[r] - owners[p]; 1685 } 1686 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1687 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1688 /* Collect flags for rows to be zeroed */ 1689 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1690 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1691 PetscCall(PetscSFDestroy(&sf)); 1692 /* Compress and put in row numbers */ 1693 for (r = 0; r < n; ++r) 1694 if (lrows[r] >= 0) lrows[len++] = r; 1695 /* zero diagonal part of matrix */ 1696 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1697 /* handle off-diagonal part of matrix */ 1698 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1699 PetscCall(VecDuplicate(l->lvec, &lmask)); 1700 PetscCall(VecGetArray(xmask, &bb)); 1701 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1702 PetscCall(VecRestoreArray(xmask, &bb)); 1703 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1704 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1705 PetscCall(VecDestroy(&xmask)); 1706 if (x) { 1707 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1708 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1709 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1710 PetscCall(VecGetArray(b, &bb)); 1711 } 1712 PetscCall(VecGetArray(lmask, &mask)); 1713 /* remove zeroed rows of off-diagonal matrix */ 1714 for (i = 0; i < len; ++i) { 1715 row = lrows[i]; 1716 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1717 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1718 for (k = 0; k < count; ++k) { 1719 aa[0] = 0.0; 1720 aa += bs; 1721 } 1722 } 1723 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1724 for (i = 0; i < l->B->rmap->N; ++i) { 1725 row = i / bs; 1726 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1727 for (k = 0; k < bs; ++k) { 1728 col = bs * baij->j[j] + k; 1729 if (PetscAbsScalar(mask[col])) { 1730 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1731 if (x) bb[i] -= aa[0] * xx[col]; 1732 aa[0] = 0.0; 1733 } 1734 } 1735 } 1736 } 1737 if (x) { 1738 PetscCall(VecRestoreArray(b, &bb)); 1739 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1740 } 1741 PetscCall(VecRestoreArray(lmask, &mask)); 1742 PetscCall(VecDestroy(&lmask)); 1743 PetscCall(PetscFree(lrows)); 1744 1745 /* only change matrix nonzero state if pattern was allowed to be changed */ 1746 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1747 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1748 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1749 } 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1754 { 1755 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1756 1757 PetscFunctionBegin; 1758 PetscCall(MatSetUnfactored(a->A)); 1759 PetscFunctionReturn(PETSC_SUCCESS); 1760 } 1761 1762 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1763 1764 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1765 { 1766 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1767 Mat a, b, c, d; 1768 PetscBool flg; 1769 1770 PetscFunctionBegin; 1771 a = matA->A; 1772 b = matA->B; 1773 c = matB->A; 1774 d = matB->B; 1775 1776 PetscCall(MatEqual(a, c, &flg)); 1777 if (flg) PetscCall(MatEqual(b, d, &flg)); 1778 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1779 PetscFunctionReturn(PETSC_SUCCESS); 1780 } 1781 1782 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1783 { 1784 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1785 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1786 1787 PetscFunctionBegin; 1788 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1789 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1790 PetscCall(MatCopy_Basic(A, B, str)); 1791 } else { 1792 PetscCall(MatCopy(a->A, b->A, str)); 1793 PetscCall(MatCopy(a->B, b->B, str)); 1794 } 1795 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1796 PetscFunctionReturn(PETSC_SUCCESS); 1797 } 1798 1799 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1800 { 1801 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1802 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1803 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1804 1805 PetscFunctionBegin; 1806 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1807 PetscFunctionReturn(PETSC_SUCCESS); 1808 } 1809 1810 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1811 { 1812 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1813 PetscBLASInt bnz, one = 1; 1814 Mat_SeqBAIJ *x, *y; 1815 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1816 1817 PetscFunctionBegin; 1818 if (str == SAME_NONZERO_PATTERN) { 1819 PetscScalar alpha = a; 1820 x = (Mat_SeqBAIJ *)xx->A->data; 1821 y = (Mat_SeqBAIJ *)yy->A->data; 1822 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1823 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1824 x = (Mat_SeqBAIJ *)xx->B->data; 1825 y = (Mat_SeqBAIJ *)yy->B->data; 1826 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1827 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1828 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1829 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1830 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1831 } else { 1832 Mat B; 1833 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1834 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1835 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1836 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1837 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1838 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1839 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1840 PetscCall(MatSetType(B, MATMPIBAIJ)); 1841 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1842 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1843 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1844 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1845 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1846 PetscCall(MatHeaderMerge(Y, &B)); 1847 PetscCall(PetscFree(nnz_d)); 1848 PetscCall(PetscFree(nnz_o)); 1849 } 1850 PetscFunctionReturn(PETSC_SUCCESS); 1851 } 1852 1853 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1854 { 1855 PetscFunctionBegin; 1856 if (PetscDefined(USE_COMPLEX)) { 1857 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1858 1859 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1860 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1861 } 1862 PetscFunctionReturn(PETSC_SUCCESS); 1863 } 1864 1865 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1866 { 1867 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1868 1869 PetscFunctionBegin; 1870 PetscCall(MatRealPart(a->A)); 1871 PetscCall(MatRealPart(a->B)); 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 1875 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1876 { 1877 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1878 1879 PetscFunctionBegin; 1880 PetscCall(MatImaginaryPart(a->A)); 1881 PetscCall(MatImaginaryPart(a->B)); 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1886 { 1887 IS iscol_local; 1888 PetscInt csize; 1889 1890 PetscFunctionBegin; 1891 PetscCall(ISGetLocalSize(iscol, &csize)); 1892 if (call == MAT_REUSE_MATRIX) { 1893 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1894 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1895 } else { 1896 PetscCall(ISAllGather(iscol, &iscol_local)); 1897 } 1898 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1899 if (call == MAT_INITIAL_MATRIX) { 1900 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1901 PetscCall(ISDestroy(&iscol_local)); 1902 } 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 /* 1907 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1908 in local and then by concatenating the local matrices the end result. 1909 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1910 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1911 */ 1912 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1913 { 1914 PetscMPIInt rank, size; 1915 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1916 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1917 Mat M, Mreuse; 1918 MatScalar *vwork, *aa; 1919 MPI_Comm comm; 1920 IS isrow_new, iscol_new; 1921 Mat_SeqBAIJ *aij; 1922 1923 PetscFunctionBegin; 1924 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1925 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1926 PetscCallMPI(MPI_Comm_size(comm, &size)); 1927 /* The compression and expansion should be avoided. Doesn't point 1928 out errors, might change the indices, hence buggey */ 1929 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1930 if (isrow == iscol) { 1931 iscol_new = isrow_new; 1932 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1933 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1934 1935 if (call == MAT_REUSE_MATRIX) { 1936 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1937 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1938 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1939 } else { 1940 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1941 } 1942 PetscCall(ISDestroy(&isrow_new)); 1943 PetscCall(ISDestroy(&iscol_new)); 1944 /* 1945 m - number of local rows 1946 n - number of columns (same on all processors) 1947 rstart - first row in new global matrix generated 1948 */ 1949 PetscCall(MatGetBlockSize(mat, &bs)); 1950 PetscCall(MatGetSize(Mreuse, &m, &n)); 1951 m = m / bs; 1952 n = n / bs; 1953 1954 if (call == MAT_INITIAL_MATRIX) { 1955 aij = (Mat_SeqBAIJ *)Mreuse->data; 1956 ii = aij->i; 1957 jj = aij->j; 1958 1959 /* 1960 Determine the number of non-zeros in the diagonal and off-diagonal 1961 portions of the matrix in order to do correct preallocation 1962 */ 1963 1964 /* first get start and end of "diagonal" columns */ 1965 if (csize == PETSC_DECIDE) { 1966 PetscCall(ISGetSize(isrow, &mglobal)); 1967 if (mglobal == n * bs) { /* square matrix */ 1968 nlocal = m; 1969 } else { 1970 nlocal = n / size + ((n % size) > rank); 1971 } 1972 } else { 1973 nlocal = csize / bs; 1974 } 1975 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1976 rstart = rend - nlocal; 1977 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1978 1979 /* next, compute all the lengths */ 1980 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1981 for (i = 0; i < m; i++) { 1982 jend = ii[i + 1] - ii[i]; 1983 olen = 0; 1984 dlen = 0; 1985 for (j = 0; j < jend; j++) { 1986 if (*jj < rstart || *jj >= rend) olen++; 1987 else dlen++; 1988 jj++; 1989 } 1990 olens[i] = olen; 1991 dlens[i] = dlen; 1992 } 1993 PetscCall(MatCreate(comm, &M)); 1994 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1995 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 1996 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1997 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1998 PetscCall(PetscFree2(dlens, olens)); 1999 } else { 2000 PetscInt ml, nl; 2001 2002 M = *newmat; 2003 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2004 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2005 PetscCall(MatZeroEntries(M)); 2006 /* 2007 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2008 rather than the slower MatSetValues(). 2009 */ 2010 M->was_assembled = PETSC_TRUE; 2011 M->assembled = PETSC_FALSE; 2012 } 2013 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2014 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2015 aij = (Mat_SeqBAIJ *)Mreuse->data; 2016 ii = aij->i; 2017 jj = aij->j; 2018 aa = aij->a; 2019 for (i = 0; i < m; i++) { 2020 row = rstart / bs + i; 2021 nz = ii[i + 1] - ii[i]; 2022 cwork = jj; 2023 jj = PetscSafePointerPlusOffset(jj, nz); 2024 vwork = aa; 2025 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2026 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2027 } 2028 2029 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2030 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2031 *newmat = M; 2032 2033 /* save submatrix used in processor for next request */ 2034 if (call == MAT_INITIAL_MATRIX) { 2035 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2036 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2037 } 2038 PetscFunctionReturn(PETSC_SUCCESS); 2039 } 2040 2041 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2042 { 2043 MPI_Comm comm, pcomm; 2044 PetscInt clocal_size, nrows; 2045 const PetscInt *rows; 2046 PetscMPIInt size; 2047 IS crowp, lcolp; 2048 2049 PetscFunctionBegin; 2050 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2051 /* make a collective version of 'rowp' */ 2052 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2053 if (pcomm == comm) { 2054 crowp = rowp; 2055 } else { 2056 PetscCall(ISGetSize(rowp, &nrows)); 2057 PetscCall(ISGetIndices(rowp, &rows)); 2058 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2059 PetscCall(ISRestoreIndices(rowp, &rows)); 2060 } 2061 PetscCall(ISSetPermutation(crowp)); 2062 /* make a local version of 'colp' */ 2063 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2064 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2065 if (size == 1) { 2066 lcolp = colp; 2067 } else { 2068 PetscCall(ISAllGather(colp, &lcolp)); 2069 } 2070 PetscCall(ISSetPermutation(lcolp)); 2071 /* now we just get the submatrix */ 2072 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2073 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2074 /* clean up */ 2075 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2076 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2081 { 2082 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2083 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2084 2085 PetscFunctionBegin; 2086 if (nghosts) *nghosts = B->nbs; 2087 if (ghosts) *ghosts = baij->garray; 2088 PetscFunctionReturn(PETSC_SUCCESS); 2089 } 2090 2091 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2092 { 2093 Mat B; 2094 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2095 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2096 Mat_SeqAIJ *b; 2097 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2098 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2099 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2100 2101 PetscFunctionBegin; 2102 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2103 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2104 2105 /* Tell every processor the number of nonzeros per row */ 2106 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2107 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2108 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2109 displs = recvcounts + size; 2110 for (i = 0; i < size; i++) { 2111 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2112 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2113 } 2114 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2115 /* Create the sequential matrix of the same type as the local block diagonal */ 2116 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2117 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2118 PetscCall(MatSetType(B, MATSEQAIJ)); 2119 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2120 b = (Mat_SeqAIJ *)B->data; 2121 2122 /* Copy my part of matrix column indices over */ 2123 sendcount = ad->nz + bd->nz; 2124 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2125 a_jsendbuf = ad->j; 2126 b_jsendbuf = bd->j; 2127 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2128 cnt = 0; 2129 for (i = 0; i < n; i++) { 2130 /* put in lower diagonal portion */ 2131 m = bd->i[i + 1] - bd->i[i]; 2132 while (m > 0) { 2133 /* is it above diagonal (in bd (compressed) numbering) */ 2134 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2135 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2136 m--; 2137 } 2138 2139 /* put in diagonal portion */ 2140 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2141 2142 /* put in upper diagonal portion */ 2143 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2144 } 2145 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2146 2147 /* Gather all column indices to all processors */ 2148 for (i = 0; i < size; i++) { 2149 recvcounts[i] = 0; 2150 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2151 } 2152 displs[0] = 0; 2153 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2154 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2155 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2156 /* set the b->ilen (length of each row) values */ 2157 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2158 /* set the b->i indices */ 2159 b->i[0] = 0; 2160 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2161 PetscCall(PetscFree(lens)); 2162 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2163 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2164 PetscCall(PetscFree(recvcounts)); 2165 2166 PetscCall(MatPropagateSymmetryOptions(A, B)); 2167 *newmat = B; 2168 PetscFunctionReturn(PETSC_SUCCESS); 2169 } 2170 2171 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2172 { 2173 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2174 Vec bb1 = NULL; 2175 2176 PetscFunctionBegin; 2177 if (flag == SOR_APPLY_UPPER) { 2178 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2179 PetscFunctionReturn(PETSC_SUCCESS); 2180 } 2181 2182 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2183 2184 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2185 if (flag & SOR_ZERO_INITIAL_GUESS) { 2186 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2187 its--; 2188 } 2189 2190 while (its--) { 2191 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2192 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2193 2194 /* update rhs: bb1 = bb - B*x */ 2195 PetscCall(VecScale(mat->lvec, -1.0)); 2196 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2197 2198 /* local sweep */ 2199 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2200 } 2201 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2202 if (flag & SOR_ZERO_INITIAL_GUESS) { 2203 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2204 its--; 2205 } 2206 while (its--) { 2207 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2208 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2209 2210 /* update rhs: bb1 = bb - B*x */ 2211 PetscCall(VecScale(mat->lvec, -1.0)); 2212 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2213 2214 /* local sweep */ 2215 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2216 } 2217 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2218 if (flag & SOR_ZERO_INITIAL_GUESS) { 2219 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2220 its--; 2221 } 2222 while (its--) { 2223 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2224 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2225 2226 /* update rhs: bb1 = bb - B*x */ 2227 PetscCall(VecScale(mat->lvec, -1.0)); 2228 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2229 2230 /* local sweep */ 2231 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2232 } 2233 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2234 2235 PetscCall(VecDestroy(&bb1)); 2236 PetscFunctionReturn(PETSC_SUCCESS); 2237 } 2238 2239 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2240 { 2241 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2242 PetscInt m, N, i, *garray = aij->garray; 2243 PetscInt ib, jb, bs = A->rmap->bs; 2244 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2245 MatScalar *a_val = a_aij->a; 2246 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2247 MatScalar *b_val = b_aij->a; 2248 PetscReal *work; 2249 2250 PetscFunctionBegin; 2251 PetscCall(MatGetSize(A, &m, &N)); 2252 PetscCall(PetscCalloc1(N, &work)); 2253 if (type == NORM_2) { 2254 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2255 for (jb = 0; jb < bs; jb++) { 2256 for (ib = 0; ib < bs; ib++) { 2257 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2258 a_val++; 2259 } 2260 } 2261 } 2262 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2263 for (jb = 0; jb < bs; jb++) { 2264 for (ib = 0; ib < bs; ib++) { 2265 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2266 b_val++; 2267 } 2268 } 2269 } 2270 } else if (type == NORM_1) { 2271 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2272 for (jb = 0; jb < bs; jb++) { 2273 for (ib = 0; ib < bs; ib++) { 2274 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2275 a_val++; 2276 } 2277 } 2278 } 2279 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2280 for (jb = 0; jb < bs; jb++) { 2281 for (ib = 0; ib < bs; ib++) { 2282 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2283 b_val++; 2284 } 2285 } 2286 } 2287 } else if (type == NORM_INFINITY) { 2288 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2289 for (jb = 0; jb < bs; jb++) { 2290 for (ib = 0; ib < bs; ib++) { 2291 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2292 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2293 a_val++; 2294 } 2295 } 2296 } 2297 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2298 for (jb = 0; jb < bs; jb++) { 2299 for (ib = 0; ib < bs; ib++) { 2300 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2301 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2302 b_val++; 2303 } 2304 } 2305 } 2306 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2307 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2308 for (jb = 0; jb < bs; jb++) { 2309 for (ib = 0; ib < bs; ib++) { 2310 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2311 a_val++; 2312 } 2313 } 2314 } 2315 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2316 for (jb = 0; jb < bs; jb++) { 2317 for (ib = 0; ib < bs; ib++) { 2318 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2319 b_val++; 2320 } 2321 } 2322 } 2323 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2324 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2325 for (jb = 0; jb < bs; jb++) { 2326 for (ib = 0; ib < bs; ib++) { 2327 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2328 a_val++; 2329 } 2330 } 2331 } 2332 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2333 for (jb = 0; jb < bs; jb++) { 2334 for (ib = 0; ib < bs; ib++) { 2335 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2336 b_val++; 2337 } 2338 } 2339 } 2340 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2341 if (type == NORM_INFINITY) { 2342 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2343 } else { 2344 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2345 } 2346 PetscCall(PetscFree(work)); 2347 if (type == NORM_2) { 2348 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2349 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2350 for (i = 0; i < N; i++) reductions[i] /= m; 2351 } 2352 PetscFunctionReturn(PETSC_SUCCESS); 2353 } 2354 2355 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2356 { 2357 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2358 2359 PetscFunctionBegin; 2360 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2361 A->factorerrortype = a->A->factorerrortype; 2362 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2363 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2364 PetscFunctionReturn(PETSC_SUCCESS); 2365 } 2366 2367 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2368 { 2369 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2370 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2371 2372 PetscFunctionBegin; 2373 if (!Y->preallocated) { 2374 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2375 } else if (!aij->nz) { 2376 PetscInt nonew = aij->nonew; 2377 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2378 aij->nonew = nonew; 2379 } 2380 PetscCall(MatShift_Basic(Y, a)); 2381 PetscFunctionReturn(PETSC_SUCCESS); 2382 } 2383 2384 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2385 { 2386 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2387 2388 PetscFunctionBegin; 2389 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2390 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2391 if (d) { 2392 PetscInt rstart; 2393 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2394 *d += rstart / A->rmap->bs; 2395 } 2396 PetscFunctionReturn(PETSC_SUCCESS); 2397 } 2398 2399 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2400 { 2401 PetscFunctionBegin; 2402 *a = ((Mat_MPIBAIJ *)A->data)->A; 2403 PetscFunctionReturn(PETSC_SUCCESS); 2404 } 2405 2406 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2407 { 2408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2409 2410 PetscFunctionBegin; 2411 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2412 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2413 PetscFunctionReturn(PETSC_SUCCESS); 2414 } 2415 2416 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2417 MatGetRow_MPIBAIJ, 2418 MatRestoreRow_MPIBAIJ, 2419 MatMult_MPIBAIJ, 2420 /* 4*/ MatMultAdd_MPIBAIJ, 2421 MatMultTranspose_MPIBAIJ, 2422 MatMultTransposeAdd_MPIBAIJ, 2423 NULL, 2424 NULL, 2425 NULL, 2426 /*10*/ NULL, 2427 NULL, 2428 NULL, 2429 MatSOR_MPIBAIJ, 2430 MatTranspose_MPIBAIJ, 2431 /*15*/ MatGetInfo_MPIBAIJ, 2432 MatEqual_MPIBAIJ, 2433 MatGetDiagonal_MPIBAIJ, 2434 MatDiagonalScale_MPIBAIJ, 2435 MatNorm_MPIBAIJ, 2436 /*20*/ MatAssemblyBegin_MPIBAIJ, 2437 MatAssemblyEnd_MPIBAIJ, 2438 MatSetOption_MPIBAIJ, 2439 MatZeroEntries_MPIBAIJ, 2440 /*24*/ MatZeroRows_MPIBAIJ, 2441 NULL, 2442 NULL, 2443 NULL, 2444 NULL, 2445 /*29*/ MatSetUp_MPI_Hash, 2446 NULL, 2447 NULL, 2448 MatGetDiagonalBlock_MPIBAIJ, 2449 NULL, 2450 /*34*/ MatDuplicate_MPIBAIJ, 2451 NULL, 2452 NULL, 2453 NULL, 2454 NULL, 2455 /*39*/ MatAXPY_MPIBAIJ, 2456 MatCreateSubMatrices_MPIBAIJ, 2457 MatIncreaseOverlap_MPIBAIJ, 2458 MatGetValues_MPIBAIJ, 2459 MatCopy_MPIBAIJ, 2460 /*44*/ NULL, 2461 MatScale_MPIBAIJ, 2462 MatShift_MPIBAIJ, 2463 NULL, 2464 MatZeroRowsColumns_MPIBAIJ, 2465 /*49*/ NULL, 2466 NULL, 2467 NULL, 2468 NULL, 2469 NULL, 2470 /*54*/ MatFDColoringCreate_MPIXAIJ, 2471 NULL, 2472 MatSetUnfactored_MPIBAIJ, 2473 MatPermute_MPIBAIJ, 2474 MatSetValuesBlocked_MPIBAIJ, 2475 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2476 MatDestroy_MPIBAIJ, 2477 MatView_MPIBAIJ, 2478 NULL, 2479 NULL, 2480 /*64*/ NULL, 2481 NULL, 2482 NULL, 2483 NULL, 2484 MatGetRowMaxAbs_MPIBAIJ, 2485 /*69*/ NULL, 2486 NULL, 2487 NULL, 2488 MatFDColoringApply_BAIJ, 2489 NULL, 2490 /*74*/ NULL, 2491 NULL, 2492 NULL, 2493 NULL, 2494 MatLoad_MPIBAIJ, 2495 /*79*/ NULL, 2496 NULL, 2497 NULL, 2498 NULL, 2499 NULL, 2500 /*84*/ NULL, 2501 NULL, 2502 NULL, 2503 NULL, 2504 NULL, 2505 /*89*/ NULL, 2506 NULL, 2507 NULL, 2508 NULL, 2509 MatConjugate_MPIBAIJ, 2510 /*94*/ NULL, 2511 NULL, 2512 MatRealPart_MPIBAIJ, 2513 MatImaginaryPart_MPIBAIJ, 2514 NULL, 2515 /*99*/ NULL, 2516 NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 /*104*/ MatMissingDiagonal_MPIBAIJ, 2521 MatGetSeqNonzeroStructure_MPIBAIJ, 2522 NULL, 2523 MatGetGhosts_MPIBAIJ, 2524 NULL, 2525 /*109*/ NULL, 2526 NULL, 2527 NULL, 2528 NULL, 2529 NULL, 2530 /*114*/ MatGetMultiProcBlock_MPIBAIJ, 2531 NULL, 2532 MatGetColumnReductions_MPIBAIJ, 2533 MatInvertBlockDiagonal_MPIBAIJ, 2534 NULL, 2535 /*119*/ NULL, 2536 NULL, 2537 NULL, 2538 NULL, 2539 NULL, 2540 /*124*/ NULL, 2541 NULL, 2542 NULL, 2543 MatSetBlockSizes_Default, 2544 NULL, 2545 /*129*/ MatFDColoringSetUp_MPIXAIJ, 2546 NULL, 2547 MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2548 NULL, 2549 NULL, 2550 /*134*/ NULL, 2551 NULL, 2552 NULL, 2553 MatEliminateZeros_MPIBAIJ, 2554 MatGetRowSumAbs_MPIBAIJ, 2555 /*139*/ NULL, 2556 NULL, 2557 NULL, 2558 MatCopyHashToXAIJ_MPI_Hash, 2559 NULL}; 2560 2561 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2562 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2563 2564 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2565 { 2566 PetscInt m, rstart, cstart, cend; 2567 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2568 const PetscInt *JJ = NULL; 2569 PetscScalar *values = NULL; 2570 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2571 PetscBool nooffprocentries; 2572 2573 PetscFunctionBegin; 2574 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2575 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2576 PetscCall(PetscLayoutSetUp(B->rmap)); 2577 PetscCall(PetscLayoutSetUp(B->cmap)); 2578 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2579 m = B->rmap->n / bs; 2580 rstart = B->rmap->rstart / bs; 2581 cstart = B->cmap->rstart / bs; 2582 cend = B->cmap->rend / bs; 2583 2584 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2585 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2586 for (i = 0; i < m; i++) { 2587 nz = ii[i + 1] - ii[i]; 2588 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2589 nz_max = PetscMax(nz_max, nz); 2590 dlen = 0; 2591 olen = 0; 2592 JJ = jj + ii[i]; 2593 for (j = 0; j < nz; j++) { 2594 if (*JJ < cstart || *JJ >= cend) olen++; 2595 else dlen++; 2596 JJ++; 2597 } 2598 d_nnz[i] = dlen; 2599 o_nnz[i] = olen; 2600 } 2601 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2602 PetscCall(PetscFree2(d_nnz, o_nnz)); 2603 2604 values = (PetscScalar *)V; 2605 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2606 for (i = 0; i < m; i++) { 2607 PetscInt row = i + rstart; 2608 PetscInt ncols = ii[i + 1] - ii[i]; 2609 const PetscInt *icols = jj + ii[i]; 2610 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2611 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2612 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2613 } else { /* block ordering does not match so we can only insert one block at a time. */ 2614 PetscInt j; 2615 for (j = 0; j < ncols; j++) { 2616 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2617 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2618 } 2619 } 2620 } 2621 2622 if (!V) PetscCall(PetscFree(values)); 2623 nooffprocentries = B->nooffprocentries; 2624 B->nooffprocentries = PETSC_TRUE; 2625 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2626 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2627 B->nooffprocentries = nooffprocentries; 2628 2629 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2630 PetscFunctionReturn(PETSC_SUCCESS); 2631 } 2632 2633 /*@C 2634 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2635 2636 Collective 2637 2638 Input Parameters: 2639 + B - the matrix 2640 . bs - the block size 2641 . i - the indices into `j` for the start of each local row (starts with zero) 2642 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2643 - v - optional values in the matrix, use `NULL` if not provided 2644 2645 Level: advanced 2646 2647 Notes: 2648 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2649 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2650 called this routine. 2651 2652 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2653 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2654 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2655 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2656 block column and the second index is over columns within a block. 2657 2658 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2659 2660 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2661 @*/ 2662 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2663 { 2664 PetscFunctionBegin; 2665 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2666 PetscValidType(B, 1); 2667 PetscValidLogicalCollectiveInt(B, bs, 2); 2668 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2673 { 2674 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2675 PetscInt i; 2676 PetscMPIInt size; 2677 2678 PetscFunctionBegin; 2679 if (B->hash_active) { 2680 B->ops[0] = b->cops; 2681 B->hash_active = PETSC_FALSE; 2682 } 2683 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2684 PetscCall(MatSetBlockSize(B, bs)); 2685 PetscCall(PetscLayoutSetUp(B->rmap)); 2686 PetscCall(PetscLayoutSetUp(B->cmap)); 2687 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2688 2689 if (d_nnz) { 2690 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2691 } 2692 if (o_nnz) { 2693 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2694 } 2695 2696 b->bs2 = bs * bs; 2697 b->mbs = B->rmap->n / bs; 2698 b->nbs = B->cmap->n / bs; 2699 b->Mbs = B->rmap->N / bs; 2700 b->Nbs = B->cmap->N / bs; 2701 2702 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2703 b->rstartbs = B->rmap->rstart / bs; 2704 b->rendbs = B->rmap->rend / bs; 2705 b->cstartbs = B->cmap->rstart / bs; 2706 b->cendbs = B->cmap->rend / bs; 2707 2708 #if defined(PETSC_USE_CTABLE) 2709 PetscCall(PetscHMapIDestroy(&b->colmap)); 2710 #else 2711 PetscCall(PetscFree(b->colmap)); 2712 #endif 2713 PetscCall(PetscFree(b->garray)); 2714 PetscCall(VecDestroy(&b->lvec)); 2715 PetscCall(VecScatterDestroy(&b->Mvctx)); 2716 2717 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2718 2719 MatSeqXAIJGetOptions_Private(b->B); 2720 PetscCall(MatDestroy(&b->B)); 2721 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2722 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2723 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2724 MatSeqXAIJRestoreOptions_Private(b->B); 2725 2726 MatSeqXAIJGetOptions_Private(b->A); 2727 PetscCall(MatDestroy(&b->A)); 2728 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2729 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2730 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2731 MatSeqXAIJRestoreOptions_Private(b->A); 2732 2733 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2734 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2735 B->preallocated = PETSC_TRUE; 2736 B->was_assembled = PETSC_FALSE; 2737 B->assembled = PETSC_FALSE; 2738 PetscFunctionReturn(PETSC_SUCCESS); 2739 } 2740 2741 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2742 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2743 2744 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2745 { 2746 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2747 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2748 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2749 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2750 2751 PetscFunctionBegin; 2752 PetscCall(PetscMalloc1(M + 1, &ii)); 2753 ii[0] = 0; 2754 for (i = 0; i < M; i++) { 2755 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2756 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2757 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2758 /* remove one from count of matrix has diagonal */ 2759 for (j = id[i]; j < id[i + 1]; j++) { 2760 if (jd[j] == i) { 2761 ii[i + 1]--; 2762 break; 2763 } 2764 } 2765 } 2766 PetscCall(PetscMalloc1(ii[M], &jj)); 2767 cnt = 0; 2768 for (i = 0; i < M; i++) { 2769 for (j = io[i]; j < io[i + 1]; j++) { 2770 if (garray[jo[j]] > rstart) break; 2771 jj[cnt++] = garray[jo[j]]; 2772 } 2773 for (k = id[i]; k < id[i + 1]; k++) { 2774 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2775 } 2776 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2777 } 2778 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2779 PetscFunctionReturn(PETSC_SUCCESS); 2780 } 2781 2782 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2783 2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2785 2786 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2787 { 2788 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2789 Mat_MPIAIJ *b; 2790 Mat B; 2791 2792 PetscFunctionBegin; 2793 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2794 2795 if (reuse == MAT_REUSE_MATRIX) { 2796 B = *newmat; 2797 } else { 2798 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2799 PetscCall(MatSetType(B, MATMPIAIJ)); 2800 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2801 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2802 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2803 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2804 } 2805 b = (Mat_MPIAIJ *)B->data; 2806 2807 if (reuse == MAT_REUSE_MATRIX) { 2808 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2809 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2810 } else { 2811 PetscInt *garray = a->garray; 2812 Mat_SeqAIJ *bB; 2813 PetscInt bs, nnz; 2814 PetscCall(MatDestroy(&b->A)); 2815 PetscCall(MatDestroy(&b->B)); 2816 /* just clear out the data structure */ 2817 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2818 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2819 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2820 2821 /* Global numbering for b->B columns */ 2822 bB = (Mat_SeqAIJ *)b->B->data; 2823 bs = A->rmap->bs; 2824 nnz = bB->i[A->rmap->n]; 2825 for (PetscInt k = 0; k < nnz; k++) { 2826 PetscInt bj = bB->j[k] / bs; 2827 PetscInt br = bB->j[k] % bs; 2828 bB->j[k] = garray[bj] * bs + br; 2829 } 2830 } 2831 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2832 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2833 2834 if (reuse == MAT_INPLACE_MATRIX) { 2835 PetscCall(MatHeaderReplace(A, &B)); 2836 } else { 2837 *newmat = B; 2838 } 2839 PetscFunctionReturn(PETSC_SUCCESS); 2840 } 2841 2842 /*MC 2843 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2844 2845 Options Database Keys: 2846 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2847 . -mat_block_size <bs> - set the blocksize used to store the matrix 2848 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2849 - -mat_use_hash_table <fact> - set hash table factor 2850 2851 Level: beginner 2852 2853 Note: 2854 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2855 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2856 2857 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2858 M*/ 2859 2860 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2861 2862 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2863 { 2864 Mat_MPIBAIJ *b; 2865 PetscBool flg = PETSC_FALSE; 2866 2867 PetscFunctionBegin; 2868 PetscCall(PetscNew(&b)); 2869 B->data = (void *)b; 2870 B->ops[0] = MatOps_Values; 2871 B->assembled = PETSC_FALSE; 2872 2873 B->insertmode = NOT_SET_VALUES; 2874 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2875 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2876 2877 /* build local table of row and column ownerships */ 2878 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2879 2880 /* build cache for off array entries formed */ 2881 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2882 2883 b->donotstash = PETSC_FALSE; 2884 b->colmap = NULL; 2885 b->garray = NULL; 2886 b->roworiented = PETSC_TRUE; 2887 2888 /* stuff used in block assembly */ 2889 b->barray = NULL; 2890 2891 /* stuff used for matrix vector multiply */ 2892 b->lvec = NULL; 2893 b->Mvctx = NULL; 2894 2895 /* stuff for MatGetRow() */ 2896 b->rowindices = NULL; 2897 b->rowvalues = NULL; 2898 b->getrowactive = PETSC_FALSE; 2899 2900 /* hash table stuff */ 2901 b->ht = NULL; 2902 b->hd = NULL; 2903 b->ht_size = 0; 2904 b->ht_flag = PETSC_FALSE; 2905 b->ht_fact = 0; 2906 b->ht_total_ct = 0; 2907 b->ht_insert_ct = 0; 2908 2909 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2910 b->ijonly = PETSC_FALSE; 2911 2912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2915 #if defined(PETSC_HAVE_HYPRE) 2916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2917 #endif 2918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2925 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2926 2927 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2928 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2929 if (flg) { 2930 PetscReal fact = 1.39; 2931 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2932 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2933 if (fact <= 1.0) fact = 1.39; 2934 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2935 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2936 } 2937 PetscOptionsEnd(); 2938 PetscFunctionReturn(PETSC_SUCCESS); 2939 } 2940 2941 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2942 /*MC 2943 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2944 2945 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2946 and `MATMPIBAIJ` otherwise. 2947 2948 Options Database Keys: 2949 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2950 2951 Level: beginner 2952 2953 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2954 M*/ 2955 2956 /*@ 2957 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2958 (block compressed row). 2959 2960 Collective 2961 2962 Input Parameters: 2963 + B - the matrix 2964 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2965 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2966 . d_nz - number of block nonzeros per block row in diagonal portion of local 2967 submatrix (same for all local rows) 2968 . d_nnz - array containing the number of block nonzeros in the various block rows 2969 of the in diagonal portion of the local (possibly different for each block 2970 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2971 set it even if it is zero. 2972 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2973 submatrix (same for all local rows). 2974 - o_nnz - array containing the number of nonzeros in the various block rows of the 2975 off-diagonal portion of the local submatrix (possibly different for 2976 each block row) or `NULL`. 2977 2978 If the *_nnz parameter is given then the *_nz parameter is ignored 2979 2980 Options Database Keys: 2981 + -mat_block_size - size of the blocks to use 2982 - -mat_use_hash_table <fact> - set hash table factor 2983 2984 Level: intermediate 2985 2986 Notes: 2987 For good matrix assembly performance 2988 the user should preallocate the matrix storage by setting the parameters 2989 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 2990 performance can be increased by more than a factor of 50. 2991 2992 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2993 than it must be used on all processors that share the object for that argument. 2994 2995 Storage Information: 2996 For a square global matrix we define each processor's diagonal portion 2997 to be its local rows and the corresponding columns (a square submatrix); 2998 each processor's off-diagonal portion encompasses the remainder of the 2999 local matrix (a rectangular submatrix). 3000 3001 The user can specify preallocated storage for the diagonal part of 3002 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3003 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3004 memory allocation. Likewise, specify preallocated storage for the 3005 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3006 3007 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3008 the figure below we depict these three local rows and all columns (0-11). 3009 3010 .vb 3011 0 1 2 3 4 5 6 7 8 9 10 11 3012 -------------------------- 3013 row 3 |o o o d d d o o o o o o 3014 row 4 |o o o d d d o o o o o o 3015 row 5 |o o o d d d o o o o o o 3016 -------------------------- 3017 .ve 3018 3019 Thus, any entries in the d locations are stored in the d (diagonal) 3020 submatrix, and any entries in the o locations are stored in the 3021 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3022 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3023 3024 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3025 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3026 In general, for PDE problems in which most nonzeros are near the diagonal, 3027 one expects `d_nz` >> `o_nz`. 3028 3029 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3030 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3031 You can also run with the option `-info` and look for messages with the string 3032 malloc in them to see if additional memory allocation was needed. 3033 3034 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3035 @*/ 3036 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3037 { 3038 PetscFunctionBegin; 3039 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3040 PetscValidType(B, 1); 3041 PetscValidLogicalCollectiveInt(B, bs, 2); 3042 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3043 PetscFunctionReturn(PETSC_SUCCESS); 3044 } 3045 3046 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3047 /*@ 3048 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3049 (block compressed row). 3050 3051 Collective 3052 3053 Input Parameters: 3054 + comm - MPI communicator 3055 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3056 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3057 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3058 This value should be the same as the local size used in creating the 3059 y vector for the matrix-vector product y = Ax. 3060 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3061 This value should be the same as the local size used in creating the 3062 x vector for the matrix-vector product y = Ax. 3063 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3064 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3065 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3066 submatrix (same for all local rows) 3067 . d_nnz - array containing the number of nonzero blocks in the various block rows 3068 of the in diagonal portion of the local (possibly different for each block 3069 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3070 and set it even if it is zero. 3071 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3072 submatrix (same for all local rows). 3073 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3074 off-diagonal portion of the local submatrix (possibly different for 3075 each block row) or NULL. 3076 3077 Output Parameter: 3078 . A - the matrix 3079 3080 Options Database Keys: 3081 + -mat_block_size - size of the blocks to use 3082 - -mat_use_hash_table <fact> - set hash table factor 3083 3084 Level: intermediate 3085 3086 Notes: 3087 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3088 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3089 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3090 3091 For good matrix assembly performance 3092 the user should preallocate the matrix storage by setting the parameters 3093 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3094 performance can be increased by more than a factor of 50. 3095 3096 If the *_nnz parameter is given then the *_nz parameter is ignored 3097 3098 A nonzero block is any block that as 1 or more nonzeros in it 3099 3100 The user MUST specify either the local or global matrix dimensions 3101 (possibly both). 3102 3103 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3104 than it must be used on all processors that share the object for that argument. 3105 3106 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3107 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3108 3109 Storage Information: 3110 For a square global matrix we define each processor's diagonal portion 3111 to be its local rows and the corresponding columns (a square submatrix); 3112 each processor's off-diagonal portion encompasses the remainder of the 3113 local matrix (a rectangular submatrix). 3114 3115 The user can specify preallocated storage for the diagonal part of 3116 the local submatrix with either d_nz or d_nnz (not both). Set 3117 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3118 memory allocation. Likewise, specify preallocated storage for the 3119 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3120 3121 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3122 the figure below we depict these three local rows and all columns (0-11). 3123 3124 .vb 3125 0 1 2 3 4 5 6 7 8 9 10 11 3126 -------------------------- 3127 row 3 |o o o d d d o o o o o o 3128 row 4 |o o o d d d o o o o o o 3129 row 5 |o o o d d d o o o o o o 3130 -------------------------- 3131 .ve 3132 3133 Thus, any entries in the d locations are stored in the d (diagonal) 3134 submatrix, and any entries in the o locations are stored in the 3135 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3136 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3137 3138 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3139 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3140 In general, for PDE problems in which most nonzeros are near the diagonal, 3141 one expects `d_nz` >> `o_nz`. 3142 3143 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3144 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3145 @*/ 3146 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3147 { 3148 PetscMPIInt size; 3149 3150 PetscFunctionBegin; 3151 PetscCall(MatCreate(comm, A)); 3152 PetscCall(MatSetSizes(*A, m, n, M, N)); 3153 PetscCallMPI(MPI_Comm_size(comm, &size)); 3154 if (size > 1) { 3155 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3156 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3157 } else { 3158 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3159 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3160 } 3161 PetscFunctionReturn(PETSC_SUCCESS); 3162 } 3163 3164 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3165 { 3166 Mat mat; 3167 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3168 PetscInt len = 0; 3169 3170 PetscFunctionBegin; 3171 *newmat = NULL; 3172 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3173 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3174 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3175 3176 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3177 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3178 if (matin->hash_active) { 3179 PetscCall(MatSetUp(mat)); 3180 } else { 3181 mat->factortype = matin->factortype; 3182 mat->preallocated = PETSC_TRUE; 3183 mat->assembled = PETSC_TRUE; 3184 mat->insertmode = NOT_SET_VALUES; 3185 3186 a = (Mat_MPIBAIJ *)mat->data; 3187 mat->rmap->bs = matin->rmap->bs; 3188 a->bs2 = oldmat->bs2; 3189 a->mbs = oldmat->mbs; 3190 a->nbs = oldmat->nbs; 3191 a->Mbs = oldmat->Mbs; 3192 a->Nbs = oldmat->Nbs; 3193 3194 a->size = oldmat->size; 3195 a->rank = oldmat->rank; 3196 a->donotstash = oldmat->donotstash; 3197 a->roworiented = oldmat->roworiented; 3198 a->rowindices = NULL; 3199 a->rowvalues = NULL; 3200 a->getrowactive = PETSC_FALSE; 3201 a->barray = NULL; 3202 a->rstartbs = oldmat->rstartbs; 3203 a->rendbs = oldmat->rendbs; 3204 a->cstartbs = oldmat->cstartbs; 3205 a->cendbs = oldmat->cendbs; 3206 3207 /* hash table stuff */ 3208 a->ht = NULL; 3209 a->hd = NULL; 3210 a->ht_size = 0; 3211 a->ht_flag = oldmat->ht_flag; 3212 a->ht_fact = oldmat->ht_fact; 3213 a->ht_total_ct = 0; 3214 a->ht_insert_ct = 0; 3215 3216 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3217 if (oldmat->colmap) { 3218 #if defined(PETSC_USE_CTABLE) 3219 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3220 #else 3221 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3222 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3223 #endif 3224 } else a->colmap = NULL; 3225 3226 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3227 PetscCall(PetscMalloc1(len, &a->garray)); 3228 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3229 } else a->garray = NULL; 3230 3231 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3232 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3233 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3234 3235 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3236 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3237 } 3238 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3239 *newmat = mat; 3240 PetscFunctionReturn(PETSC_SUCCESS); 3241 } 3242 3243 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3244 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3245 { 3246 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3247 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3248 PetscScalar *matvals; 3249 3250 PetscFunctionBegin; 3251 PetscCall(PetscViewerSetUp(viewer)); 3252 3253 /* read in matrix header */ 3254 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3255 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3256 M = header[1]; 3257 N = header[2]; 3258 nz = header[3]; 3259 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3260 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3261 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3262 3263 /* set block sizes from the viewer's .info file */ 3264 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3265 /* set local sizes if not set already */ 3266 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3267 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3268 /* set global sizes if not set already */ 3269 if (mat->rmap->N < 0) mat->rmap->N = M; 3270 if (mat->cmap->N < 0) mat->cmap->N = N; 3271 PetscCall(PetscLayoutSetUp(mat->rmap)); 3272 PetscCall(PetscLayoutSetUp(mat->cmap)); 3273 3274 /* check if the matrix sizes are correct */ 3275 PetscCall(MatGetSize(mat, &rows, &cols)); 3276 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3277 PetscCall(MatGetBlockSize(mat, &bs)); 3278 PetscCall(MatGetLocalSize(mat, &m, &n)); 3279 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3280 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3281 mbs = m / bs; 3282 nbs = n / bs; 3283 3284 /* read in row lengths and build row indices */ 3285 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3286 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3287 rowidxs[0] = 0; 3288 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3289 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3290 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3291 3292 /* read in column indices and matrix values */ 3293 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3294 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3295 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3296 3297 { /* preallocate matrix storage */ 3298 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3299 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3300 PetscBool sbaij, done; 3301 PetscInt *d_nnz, *o_nnz; 3302 3303 PetscCall(PetscBTCreate(nbs, &bt)); 3304 PetscCall(PetscHSetICreate(&ht)); 3305 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3306 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3307 for (i = 0; i < mbs; i++) { 3308 PetscCall(PetscBTMemzero(nbs, bt)); 3309 PetscCall(PetscHSetIClear(ht)); 3310 for (k = 0; k < bs; k++) { 3311 PetscInt row = bs * i + k; 3312 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3313 PetscInt col = colidxs[j]; 3314 if (!sbaij || col >= row) { 3315 if (col >= cs && col < ce) { 3316 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3317 } else { 3318 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3319 if (done) o_nnz[i]++; 3320 } 3321 } 3322 } 3323 } 3324 } 3325 PetscCall(PetscBTDestroy(&bt)); 3326 PetscCall(PetscHSetIDestroy(&ht)); 3327 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3328 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3329 PetscCall(PetscFree2(d_nnz, o_nnz)); 3330 } 3331 3332 /* store matrix values */ 3333 for (i = 0; i < m; i++) { 3334 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3335 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3336 } 3337 3338 PetscCall(PetscFree(rowidxs)); 3339 PetscCall(PetscFree2(colidxs, matvals)); 3340 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3341 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3342 PetscFunctionReturn(PETSC_SUCCESS); 3343 } 3344 3345 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3346 { 3347 PetscBool isbinary; 3348 3349 PetscFunctionBegin; 3350 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3351 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3352 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3353 PetscFunctionReturn(PETSC_SUCCESS); 3354 } 3355 3356 /*@ 3357 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3358 3359 Input Parameters: 3360 + mat - the matrix 3361 - fact - factor 3362 3363 Options Database Key: 3364 . -mat_use_hash_table <fact> - provide the factor 3365 3366 Level: advanced 3367 3368 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3369 @*/ 3370 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3371 { 3372 PetscFunctionBegin; 3373 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3374 PetscFunctionReturn(PETSC_SUCCESS); 3375 } 3376 3377 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3378 { 3379 Mat_MPIBAIJ *baij; 3380 3381 PetscFunctionBegin; 3382 baij = (Mat_MPIBAIJ *)mat->data; 3383 baij->ht_fact = fact; 3384 PetscFunctionReturn(PETSC_SUCCESS); 3385 } 3386 3387 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3388 { 3389 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3390 PetscBool flg; 3391 3392 PetscFunctionBegin; 3393 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3394 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3395 if (Ad) *Ad = a->A; 3396 if (Ao) *Ao = a->B; 3397 if (colmap) *colmap = a->garray; 3398 PetscFunctionReturn(PETSC_SUCCESS); 3399 } 3400 3401 /* 3402 Special version for direct calls from Fortran (to eliminate two function call overheads 3403 */ 3404 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3405 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3406 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3407 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3408 #endif 3409 3410 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3411 /*@C 3412 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3413 3414 Collective 3415 3416 Input Parameters: 3417 + matin - the matrix 3418 . min - number of input rows 3419 . im - input rows 3420 . nin - number of input columns 3421 . in - input columns 3422 . v - numerical values input 3423 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3424 3425 Level: advanced 3426 3427 Developer Notes: 3428 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3429 3430 .seealso: `Mat`, `MatSetValuesBlocked()` 3431 @*/ 3432 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3433 { 3434 /* convert input arguments to C version */ 3435 Mat mat = *matin; 3436 PetscInt m = *min, n = *nin; 3437 InsertMode addv = *addvin; 3438 3439 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3440 const MatScalar *value; 3441 MatScalar *barray = baij->barray; 3442 PetscBool roworiented = baij->roworiented; 3443 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3444 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3445 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3446 3447 PetscFunctionBegin; 3448 /* tasks normally handled by MatSetValuesBlocked() */ 3449 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3450 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3451 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3452 if (mat->assembled) { 3453 mat->was_assembled = PETSC_TRUE; 3454 mat->assembled = PETSC_FALSE; 3455 } 3456 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3457 3458 if (!barray) { 3459 PetscCall(PetscMalloc1(bs2, &barray)); 3460 baij->barray = barray; 3461 } 3462 3463 if (roworiented) stepval = (n - 1) * bs; 3464 else stepval = (m - 1) * bs; 3465 3466 for (i = 0; i < m; i++) { 3467 if (im[i] < 0) continue; 3468 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3469 if (im[i] >= rstart && im[i] < rend) { 3470 row = im[i] - rstart; 3471 for (j = 0; j < n; j++) { 3472 /* If NumCol = 1 then a copy is not required */ 3473 if ((roworiented) && (n == 1)) { 3474 barray = (MatScalar *)v + i * bs2; 3475 } else if ((!roworiented) && (m == 1)) { 3476 barray = (MatScalar *)v + j * bs2; 3477 } else { /* Here a copy is required */ 3478 if (roworiented) { 3479 value = v + i * (stepval + bs) * bs + j * bs; 3480 } else { 3481 value = v + j * (stepval + bs) * bs + i * bs; 3482 } 3483 for (ii = 0; ii < bs; ii++, value += stepval) { 3484 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3485 } 3486 barray -= bs2; 3487 } 3488 3489 if (in[j] >= cstart && in[j] < cend) { 3490 col = in[j] - cstart; 3491 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3492 } else if (in[j] < 0) { 3493 continue; 3494 } else { 3495 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3496 if (mat->was_assembled) { 3497 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3498 3499 #if defined(PETSC_USE_DEBUG) 3500 #if defined(PETSC_USE_CTABLE) 3501 { 3502 PetscInt data; 3503 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3504 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3505 } 3506 #else 3507 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3508 #endif 3509 #endif 3510 #if defined(PETSC_USE_CTABLE) 3511 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3512 col = (col - 1) / bs; 3513 #else 3514 col = (baij->colmap[in[j]] - 1) / bs; 3515 #endif 3516 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3517 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3518 col = in[j]; 3519 } 3520 } else col = in[j]; 3521 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3522 } 3523 } 3524 } else { 3525 if (!baij->donotstash) { 3526 if (roworiented) { 3527 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3528 } else { 3529 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3530 } 3531 } 3532 } 3533 } 3534 3535 /* task normally handled by MatSetValuesBlocked() */ 3536 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3537 PetscFunctionReturn(PETSC_SUCCESS); 3538 } 3539 3540 /*@ 3541 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3542 3543 Collective 3544 3545 Input Parameters: 3546 + comm - MPI communicator 3547 . bs - the block size, only a block size of 1 is supported 3548 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3549 . n - This value should be the same as the local size used in creating the 3550 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3551 calculated if `N` is given) For square matrices `n` is almost always `m`. 3552 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3553 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3554 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3555 . j - column indices 3556 - a - matrix values 3557 3558 Output Parameter: 3559 . mat - the matrix 3560 3561 Level: intermediate 3562 3563 Notes: 3564 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3565 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3566 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3567 3568 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3569 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3570 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3571 with column-major ordering within blocks. 3572 3573 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3574 3575 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3576 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3577 @*/ 3578 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3579 { 3580 PetscFunctionBegin; 3581 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3582 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3583 PetscCall(MatCreate(comm, mat)); 3584 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3585 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3586 PetscCall(MatSetBlockSize(*mat, bs)); 3587 PetscCall(MatSetUp(*mat)); 3588 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3589 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3590 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3591 PetscFunctionReturn(PETSC_SUCCESS); 3592 } 3593 3594 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3595 { 3596 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3597 PetscInt *indx; 3598 PetscScalar *values; 3599 3600 PetscFunctionBegin; 3601 PetscCall(MatGetSize(inmat, &m, &N)); 3602 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3603 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3604 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3605 PetscInt *bindx, rmax = a->rmax, j; 3606 PetscMPIInt rank, size; 3607 3608 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3609 mbs = m / bs; 3610 Nbs = N / cbs; 3611 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3612 nbs = n / cbs; 3613 3614 PetscCall(PetscMalloc1(rmax, &bindx)); 3615 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3616 3617 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3618 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3619 if (rank == size - 1) { 3620 /* Check sum(nbs) = Nbs */ 3621 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3622 } 3623 3624 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3625 for (i = 0; i < mbs; i++) { 3626 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3627 nnz = nnz / bs; 3628 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3629 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3630 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3631 } 3632 PetscCall(PetscFree(bindx)); 3633 3634 PetscCall(MatCreate(comm, outmat)); 3635 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3636 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3637 PetscCall(MatSetType(*outmat, MATBAIJ)); 3638 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3639 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3640 MatPreallocateEnd(dnz, onz); 3641 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3642 } 3643 3644 /* numeric phase */ 3645 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3646 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3647 3648 for (i = 0; i < m; i++) { 3649 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3650 Ii = i + rstart; 3651 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3652 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3653 } 3654 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3655 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3656 PetscFunctionReturn(PETSC_SUCCESS); 3657 } 3658