1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *vv; 62 Vec vB, vA; 63 const PetscScalar *va, *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayRead(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(v, &vv)); 92 PetscCall(VecRestoreArrayRead(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 690 { 691 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 692 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 693 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 694 PetscReal sum = 0.0; 695 MatScalar *v; 696 697 PetscFunctionBegin; 698 if (baij->size == 1) { 699 PetscCall(MatNorm(baij->A, type, nrm)); 700 } else { 701 if (type == NORM_FROBENIUS) { 702 v = amat->a; 703 nz = amat->nz * bs2; 704 for (i = 0; i < nz; i++) { 705 sum += PetscRealPart(PetscConj(*v) * (*v)); 706 v++; 707 } 708 v = bmat->a; 709 nz = bmat->nz * bs2; 710 for (i = 0; i < nz; i++) { 711 sum += PetscRealPart(PetscConj(*v) * (*v)); 712 v++; 713 } 714 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 715 *nrm = PetscSqrtReal(*nrm); 716 } else if (type == NORM_1) { /* max column sum */ 717 Vec col, bcol; 718 PetscScalar *array; 719 PetscInt *jj, *garray = baij->garray; 720 721 PetscCall(MatCreateVecs(mat, &col, NULL)); 722 PetscCall(VecSet(col, 0.0)); 723 PetscCall(VecGetArrayWrite(col, &array)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 PetscInt col = bs * *jj + j; /* column index */ 729 730 for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++); 731 } 732 jj++; 733 } 734 PetscCall(VecRestoreArrayWrite(col, &array)); 735 PetscCall(MatCreateVecs(baij->B, &bcol, NULL)); 736 PetscCall(VecSet(bcol, 0.0)); 737 PetscCall(VecGetArrayWrite(bcol, &array)); 738 v = bmat->a; 739 jj = bmat->j; 740 for (i = 0; i < bmat->nz; i++) { 741 for (j = 0; j < bs; j++) { 742 PetscInt col = bs * *jj + j; /* column index */ 743 744 for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++); 745 } 746 jj++; 747 } 748 PetscCall(VecSetValuesBlocked(col, bmat->nbs, garray, array, ADD_VALUES)); 749 PetscCall(VecRestoreArrayWrite(bcol, &array)); 750 PetscCall(VecDestroy(&bcol)); 751 PetscCall(VecAssemblyBegin(col)); 752 PetscCall(VecAssemblyEnd(col)); 753 PetscCall(VecNorm(col, NORM_INFINITY, nrm)); 754 PetscCall(VecDestroy(&col)); 755 } else if (type == NORM_INFINITY) { /* max row sum */ 756 PetscReal *sums; 757 PetscCall(PetscMalloc1(bs, &sums)); 758 sum = 0.0; 759 for (j = 0; j < amat->mbs; j++) { 760 for (row = 0; row < bs; row++) sums[row] = 0.0; 761 v = amat->a + bs2 * amat->i[j]; 762 nz = amat->i[j + 1] - amat->i[j]; 763 for (i = 0; i < nz; i++) { 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 sums[row] += PetscAbsScalar(*v); 767 v++; 768 } 769 } 770 } 771 v = bmat->a + bs2 * bmat->i[j]; 772 nz = bmat->i[j + 1] - bmat->i[j]; 773 for (i = 0; i < nz; i++) { 774 for (col = 0; col < bs; col++) { 775 for (row = 0; row < bs; row++) { 776 sums[row] += PetscAbsScalar(*v); 777 v++; 778 } 779 } 780 } 781 for (row = 0; row < bs; row++) { 782 if (sums[row] > sum) sum = sums[row]; 783 } 784 } 785 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 786 PetscCall(PetscFree(sums)); 787 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 788 } 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 /* 793 Creates the hash table, and sets the table 794 This table is created only once. 795 If new entries need to be added to the matrix 796 then the hash table has to be destroyed and 797 recreated. 798 */ 799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 802 Mat A = baij->A, B = baij->B; 803 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 804 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 805 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 806 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 807 PetscInt *HT, key; 808 MatScalar **HD; 809 PetscReal tmp; 810 #if defined(PETSC_USE_INFO) 811 PetscInt ct = 0, max = 0; 812 #endif 813 814 PetscFunctionBegin; 815 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 816 817 baij->ht_size = (PetscInt)(factor * nz); 818 ht_size = baij->ht_size; 819 820 /* Allocate Memory for Hash Table */ 821 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 822 HD = baij->hd; 823 HT = baij->ht; 824 825 /* Loop Over A */ 826 for (i = 0; i < a->mbs; i++) { 827 for (j = ai[i]; j < ai[i + 1]; j++) { 828 row = i + rstart; 829 col = aj[j] + cstart; 830 831 key = row * Nbs + col + 1; 832 h1 = HASH(ht_size, key, tmp); 833 for (k = 0; k < ht_size; k++) { 834 if (!HT[(h1 + k) % ht_size]) { 835 HT[(h1 + k) % ht_size] = key; 836 HD[(h1 + k) % ht_size] = a->a + j * bs2; 837 break; 838 #if defined(PETSC_USE_INFO) 839 } else { 840 ct++; 841 #endif 842 } 843 } 844 #if defined(PETSC_USE_INFO) 845 if (k > max) max = k; 846 #endif 847 } 848 } 849 /* Loop Over B */ 850 for (i = 0; i < b->mbs; i++) { 851 for (j = bi[i]; j < bi[i + 1]; j++) { 852 row = i + rstart; 853 col = garray[bj[j]]; 854 key = row * Nbs + col + 1; 855 h1 = HASH(ht_size, key, tmp); 856 for (k = 0; k < ht_size; k++) { 857 if (!HT[(h1 + k) % ht_size]) { 858 HT[(h1 + k) % ht_size] = key; 859 HD[(h1 + k) % ht_size] = b->a + j * bs2; 860 break; 861 #if defined(PETSC_USE_INFO) 862 } else { 863 ct++; 864 #endif 865 } 866 } 867 #if defined(PETSC_USE_INFO) 868 if (k > max) max = k; 869 #endif 870 } 871 } 872 873 /* Print Summary */ 874 #if defined(PETSC_USE_INFO) 875 for (i = 0, j = 0; i < ht_size; i++) { 876 if (HT[i]) j++; 877 } 878 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 879 #endif 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 886 PetscInt nstash, reallocs; 887 888 PetscFunctionBegin; 889 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 890 891 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 893 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 901 { 902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 904 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 905 PetscInt *row, *col; 906 PetscBool r1, r2, r3, all_assembled; 907 MatScalar *val; 908 PetscMPIInt n; 909 910 PetscFunctionBegin; 911 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 912 if (!baij->donotstash && !mat->nooffprocentries) { 913 while (1) { 914 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 915 if (!flg) break; 916 917 for (i = 0; i < n;) { 918 /* Now identify the consecutive vals belonging to the same row */ 919 for (j = i, rstart = row[j]; j < n; j++) { 920 if (row[j] != rstart) break; 921 } 922 if (j < n) ncols = j - i; 923 else ncols = n - i; 924 /* Now assemble all these values with a single function call */ 925 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 926 i = j; 927 } 928 } 929 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 930 /* Now process the block-stash. Since the values are stashed column-oriented, 931 set the row-oriented flag to column-oriented, and after MatSetValues() 932 restore the original flags */ 933 r1 = baij->roworiented; 934 r2 = a->roworiented; 935 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 936 937 baij->roworiented = PETSC_FALSE; 938 a->roworiented = PETSC_FALSE; 939 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 940 while (1) { 941 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 942 if (!flg) break; 943 944 for (i = 0; i < n;) { 945 /* Now identify the consecutive vals belonging to the same row */ 946 for (j = i, rstart = row[j]; j < n; j++) { 947 if (row[j] != rstart) break; 948 } 949 if (j < n) ncols = j - i; 950 else ncols = n - i; 951 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 952 i = j; 953 } 954 } 955 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 956 957 baij->roworiented = r1; 958 a->roworiented = r2; 959 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 960 } 961 962 PetscCall(MatAssemblyBegin(baij->A, mode)); 963 PetscCall(MatAssemblyEnd(baij->A, mode)); 964 965 /* determine if any process has disassembled, if so we must 966 also disassemble ourselves, in order that we may reassemble. */ 967 /* 968 if nonzero structure of submatrix B cannot change then we know that 969 no process disassembled thus we can skip this stuff 970 */ 971 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 972 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 973 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 974 } 975 976 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 977 PetscCall(MatAssemblyBegin(baij->B, mode)); 978 PetscCall(MatAssemblyEnd(baij->B, mode)); 979 980 #if defined(PETSC_USE_INFO) 981 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 982 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 983 984 baij->ht_total_ct = 0; 985 baij->ht_insert_ct = 0; 986 } 987 #endif 988 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 989 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 990 991 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 992 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 993 } 994 995 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 996 997 baij->rowvalues = NULL; 998 999 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1000 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 1001 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1002 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1003 } 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 #include <petscdraw.h> 1008 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1009 { 1010 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1011 PetscMPIInt rank = baij->rank; 1012 PetscInt bs = mat->rmap->bs; 1013 PetscBool isascii, isdraw; 1014 PetscViewer sviewer; 1015 PetscViewerFormat format; 1016 1017 PetscFunctionBegin; 1018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1020 if (isascii) { 1021 PetscCall(PetscViewerGetFormat(viewer, &format)); 1022 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1023 MatInfo info; 1024 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1025 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1026 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1027 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1028 mat->rmap->bs, info.memory)); 1029 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1031 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1033 PetscCall(PetscViewerFlush(viewer)); 1034 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1035 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1036 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1037 PetscFunctionReturn(PETSC_SUCCESS); 1038 } else if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_FACTOR_INFO) PetscFunctionReturn(PETSC_SUCCESS); 1039 } 1040 1041 if (isdraw) { 1042 PetscDraw draw; 1043 PetscBool isnull; 1044 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1045 PetscCall(PetscDrawIsNull(draw, &isnull)); 1046 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1047 } 1048 1049 { 1050 /* assemble the entire matrix onto first processor. */ 1051 Mat A; 1052 Mat_SeqBAIJ *Aloc; 1053 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1054 MatScalar *a; 1055 const char *matname; 1056 1057 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1058 /* Perhaps this should be the type of mat? */ 1059 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1060 if (rank == 0) { 1061 PetscCall(MatSetSizes(A, M, N, M, N)); 1062 } else { 1063 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1064 } 1065 PetscCall(MatSetType(A, MATMPIBAIJ)); 1066 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1067 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1068 1069 /* copy over the A part */ 1070 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1071 ai = Aloc->i; 1072 aj = Aloc->j; 1073 a = Aloc->a; 1074 PetscCall(PetscMalloc1(bs, &rvals)); 1075 1076 for (i = 0; i < mbs; i++) { 1077 rvals[0] = bs * (baij->rstartbs + i); 1078 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1079 for (j = ai[i]; j < ai[i + 1]; j++) { 1080 col = (baij->cstartbs + aj[j]) * bs; 1081 for (k = 0; k < bs; k++) { 1082 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1083 col++; 1084 a += bs; 1085 } 1086 } 1087 } 1088 /* copy over the B part */ 1089 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1090 ai = Aloc->i; 1091 aj = Aloc->j; 1092 a = Aloc->a; 1093 for (i = 0; i < mbs; i++) { 1094 rvals[0] = bs * (baij->rstartbs + i); 1095 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1096 for (j = ai[i]; j < ai[i + 1]; j++) { 1097 col = baij->garray[aj[j]] * bs; 1098 for (k = 0; k < bs; k++) { 1099 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1100 col++; 1101 a += bs; 1102 } 1103 } 1104 } 1105 PetscCall(PetscFree(rvals)); 1106 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1107 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1108 /* 1109 Everyone has to call to draw the matrix since the graphics waits are 1110 synchronized across all processors that share the PetscDraw object 1111 */ 1112 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1113 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1114 if (rank == 0) { 1115 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1116 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1117 } 1118 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1119 PetscCall(MatDestroy(&A)); 1120 } 1121 PetscFunctionReturn(PETSC_SUCCESS); 1122 } 1123 1124 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1125 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1126 { 1127 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1128 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1130 const PetscInt *garray = aij->garray; 1131 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1132 PetscCount nz, hnz; 1133 PetscInt *rowlens, *colidxs; 1134 PetscScalar *matvals; 1135 PetscMPIInt rank; 1136 1137 PetscFunctionBegin; 1138 PetscCall(PetscViewerSetUp(viewer)); 1139 1140 M = mat->rmap->N; 1141 N = mat->cmap->N; 1142 m = mat->rmap->n; 1143 rs = mat->rmap->rstart; 1144 cs = mat->cmap->rstart; 1145 bs = mat->rmap->bs; 1146 nz = bs * bs * (A->nz + B->nz); 1147 1148 /* write matrix header */ 1149 header[0] = MAT_FILE_CLASSID; 1150 header[1] = M; 1151 header[2] = N; 1152 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1153 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1154 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1155 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1156 1157 /* fill in and store row lengths */ 1158 PetscCall(PetscMalloc1(m, &rowlens)); 1159 for (cnt = 0, i = 0; i < A->mbs; i++) 1160 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1161 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1162 PetscCall(PetscFree(rowlens)); 1163 1164 /* fill in and store column indices */ 1165 PetscCall(PetscMalloc1(nz, &colidxs)); 1166 for (cnt = 0, i = 0; i < A->mbs; i++) { 1167 for (k = 0; k < bs; k++) { 1168 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1169 if (garray[B->j[jb]] > cs / bs) break; 1170 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1171 } 1172 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1173 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1174 for (; jb < B->i[i + 1]; jb++) 1175 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1176 } 1177 } 1178 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1179 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1180 PetscCall(PetscFree(colidxs)); 1181 1182 /* fill in and store nonzero values */ 1183 PetscCall(PetscMalloc1(nz, &matvals)); 1184 for (cnt = 0, i = 0; i < A->mbs; i++) { 1185 for (k = 0; k < bs; k++) { 1186 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1187 if (garray[B->j[jb]] > cs / bs) break; 1188 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1189 } 1190 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1191 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1192 for (; jb < B->i[i + 1]; jb++) 1193 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1194 } 1195 } 1196 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1197 PetscCall(PetscFree(matvals)); 1198 1199 /* write block size option to the viewer's .info file */ 1200 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1201 PetscFunctionReturn(PETSC_SUCCESS); 1202 } 1203 1204 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1205 { 1206 PetscBool isascii, isdraw, issocket, isbinary; 1207 1208 PetscFunctionBegin; 1209 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1210 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1211 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1213 if (isascii || isdraw || issocket) PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1214 else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1215 PetscFunctionReturn(PETSC_SUCCESS); 1216 } 1217 1218 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1219 { 1220 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1221 PetscInt nt; 1222 1223 PetscFunctionBegin; 1224 PetscCall(VecGetLocalSize(xx, &nt)); 1225 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1226 PetscCall(VecGetLocalSize(yy, &nt)); 1227 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1228 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1229 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1230 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1231 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1232 PetscFunctionReturn(PETSC_SUCCESS); 1233 } 1234 1235 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1236 { 1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1238 1239 PetscFunctionBegin; 1240 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1241 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1242 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1243 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1244 PetscFunctionReturn(PETSC_SUCCESS); 1245 } 1246 1247 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1248 { 1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1250 1251 PetscFunctionBegin; 1252 /* do nondiagonal part */ 1253 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1254 /* do local part */ 1255 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1256 /* add partial results together */ 1257 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1258 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1259 PetscFunctionReturn(PETSC_SUCCESS); 1260 } 1261 1262 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1263 { 1264 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1265 1266 PetscFunctionBegin; 1267 /* do nondiagonal part */ 1268 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1269 /* do local part */ 1270 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1271 /* add partial results together */ 1272 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1273 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 /* 1278 This only works correctly for square matrices where the subblock A->A is the 1279 diagonal block 1280 */ 1281 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1282 { 1283 PetscFunctionBegin; 1284 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1285 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1286 PetscFunctionReturn(PETSC_SUCCESS); 1287 } 1288 1289 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1290 { 1291 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1292 1293 PetscFunctionBegin; 1294 PetscCall(MatScale(a->A, aa)); 1295 PetscCall(MatScale(a->B, aa)); 1296 PetscFunctionReturn(PETSC_SUCCESS); 1297 } 1298 1299 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1300 { 1301 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1302 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1303 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1304 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1305 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1306 1307 PetscFunctionBegin; 1308 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1309 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1310 mat->getrowactive = PETSC_TRUE; 1311 1312 if (!mat->rowvalues && (idx || v)) { 1313 /* 1314 allocate enough space to hold information from the longest row. 1315 */ 1316 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1317 PetscInt max = 1, mbs = mat->mbs, tmp; 1318 for (i = 0; i < mbs; i++) { 1319 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1320 if (max < tmp) max = tmp; 1321 } 1322 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1323 } 1324 lrow = row - brstart; 1325 1326 pvA = &vworkA; 1327 pcA = &cworkA; 1328 pvB = &vworkB; 1329 pcB = &cworkB; 1330 if (!v) { 1331 pvA = NULL; 1332 pvB = NULL; 1333 } 1334 if (!idx) { 1335 pcA = NULL; 1336 if (!v) pcB = NULL; 1337 } 1338 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1339 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1340 nztot = nzA + nzB; 1341 1342 cmap = mat->garray; 1343 if (v || idx) { 1344 if (nztot) { 1345 /* Sort by increasing column numbers, assuming A and B already sorted */ 1346 PetscInt imark = -1; 1347 if (v) { 1348 *v = v_p = mat->rowvalues; 1349 for (i = 0; i < nzB; i++) { 1350 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1351 else break; 1352 } 1353 imark = i; 1354 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1355 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1356 } 1357 if (idx) { 1358 *idx = idx_p = mat->rowindices; 1359 if (imark > -1) { 1360 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1361 } else { 1362 for (i = 0; i < nzB; i++) { 1363 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1364 else break; 1365 } 1366 imark = i; 1367 } 1368 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1369 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1370 } 1371 } else { 1372 if (idx) *idx = NULL; 1373 if (v) *v = NULL; 1374 } 1375 } 1376 *nz = nztot; 1377 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1378 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 1382 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1383 { 1384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1385 1386 PetscFunctionBegin; 1387 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1388 baij->getrowactive = PETSC_FALSE; 1389 PetscFunctionReturn(PETSC_SUCCESS); 1390 } 1391 1392 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1393 { 1394 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1395 1396 PetscFunctionBegin; 1397 PetscCall(MatZeroEntries(l->A)); 1398 PetscCall(MatZeroEntries(l->B)); 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1403 { 1404 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1405 Mat A = a->A, B = a->B; 1406 PetscLogDouble isend[5], irecv[5]; 1407 1408 PetscFunctionBegin; 1409 info->block_size = (PetscReal)matin->rmap->bs; 1410 1411 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1412 1413 isend[0] = info->nz_used; 1414 isend[1] = info->nz_allocated; 1415 isend[2] = info->nz_unneeded; 1416 isend[3] = info->memory; 1417 isend[4] = info->mallocs; 1418 1419 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1420 1421 isend[0] += info->nz_used; 1422 isend[1] += info->nz_allocated; 1423 isend[2] += info->nz_unneeded; 1424 isend[3] += info->memory; 1425 isend[4] += info->mallocs; 1426 1427 if (flag == MAT_LOCAL) { 1428 info->nz_used = isend[0]; 1429 info->nz_allocated = isend[1]; 1430 info->nz_unneeded = isend[2]; 1431 info->memory = isend[3]; 1432 info->mallocs = isend[4]; 1433 } else if (flag == MAT_GLOBAL_MAX) { 1434 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1435 1436 info->nz_used = irecv[0]; 1437 info->nz_allocated = irecv[1]; 1438 info->nz_unneeded = irecv[2]; 1439 info->memory = irecv[3]; 1440 info->mallocs = irecv[4]; 1441 } else if (flag == MAT_GLOBAL_SUM) { 1442 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1443 1444 info->nz_used = irecv[0]; 1445 info->nz_allocated = irecv[1]; 1446 info->nz_unneeded = irecv[2]; 1447 info->memory = irecv[3]; 1448 info->mallocs = irecv[4]; 1449 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1450 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1451 info->fill_ratio_needed = 0; 1452 info->factor_mallocs = 0; 1453 PetscFunctionReturn(PETSC_SUCCESS); 1454 } 1455 1456 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1457 { 1458 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1459 1460 PetscFunctionBegin; 1461 switch (op) { 1462 case MAT_NEW_NONZERO_LOCATIONS: 1463 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1464 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1465 case MAT_KEEP_NONZERO_PATTERN: 1466 case MAT_NEW_NONZERO_LOCATION_ERR: 1467 MatCheckPreallocated(A, 1); 1468 PetscCall(MatSetOption(a->A, op, flg)); 1469 PetscCall(MatSetOption(a->B, op, flg)); 1470 break; 1471 case MAT_ROW_ORIENTED: 1472 MatCheckPreallocated(A, 1); 1473 a->roworiented = flg; 1474 1475 PetscCall(MatSetOption(a->A, op, flg)); 1476 PetscCall(MatSetOption(a->B, op, flg)); 1477 break; 1478 case MAT_IGNORE_OFF_PROC_ENTRIES: 1479 a->donotstash = flg; 1480 break; 1481 case MAT_USE_HASH_TABLE: 1482 a->ht_flag = flg; 1483 a->ht_fact = 1.39; 1484 break; 1485 case MAT_SYMMETRIC: 1486 case MAT_STRUCTURALLY_SYMMETRIC: 1487 case MAT_HERMITIAN: 1488 case MAT_SYMMETRY_ETERNAL: 1489 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1490 case MAT_SPD_ETERNAL: 1491 /* if the diagonal matrix is square it inherits some of the properties above */ 1492 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1493 break; 1494 default: 1495 break; 1496 } 1497 PetscFunctionReturn(PETSC_SUCCESS); 1498 } 1499 1500 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1501 { 1502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1503 Mat_SeqBAIJ *Aloc; 1504 Mat B; 1505 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1506 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1507 MatScalar *a; 1508 1509 PetscFunctionBegin; 1510 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1511 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1512 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1513 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1514 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1515 /* Do not know preallocation information, but must set block size */ 1516 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1517 } else { 1518 B = *matout; 1519 } 1520 1521 /* copy over the A part */ 1522 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1523 ai = Aloc->i; 1524 aj = Aloc->j; 1525 a = Aloc->a; 1526 PetscCall(PetscMalloc1(bs, &rvals)); 1527 1528 for (i = 0; i < mbs; i++) { 1529 rvals[0] = bs * (baij->rstartbs + i); 1530 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1531 for (j = ai[i]; j < ai[i + 1]; j++) { 1532 col = (baij->cstartbs + aj[j]) * bs; 1533 for (k = 0; k < bs; k++) { 1534 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1535 1536 col++; 1537 a += bs; 1538 } 1539 } 1540 } 1541 /* copy over the B part */ 1542 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1543 ai = Aloc->i; 1544 aj = Aloc->j; 1545 a = Aloc->a; 1546 for (i = 0; i < mbs; i++) { 1547 rvals[0] = bs * (baij->rstartbs + i); 1548 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1549 for (j = ai[i]; j < ai[i + 1]; j++) { 1550 col = baij->garray[aj[j]] * bs; 1551 for (k = 0; k < bs; k++) { 1552 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1553 col++; 1554 a += bs; 1555 } 1556 } 1557 } 1558 PetscCall(PetscFree(rvals)); 1559 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1560 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1561 1562 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1563 else PetscCall(MatHeaderMerge(A, &B)); 1564 PetscFunctionReturn(PETSC_SUCCESS); 1565 } 1566 1567 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1568 { 1569 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1570 Mat a = baij->A, b = baij->B; 1571 PetscInt s1, s2, s3; 1572 1573 PetscFunctionBegin; 1574 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1575 if (rr) { 1576 PetscCall(VecGetLocalSize(rr, &s1)); 1577 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1578 /* Overlap communication with computation. */ 1579 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1580 } 1581 if (ll) { 1582 PetscCall(VecGetLocalSize(ll, &s1)); 1583 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1584 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1585 } 1586 /* scale the diagonal block */ 1587 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1588 1589 if (rr) { 1590 /* Do a scatter end and then right scale the off-diagonal block */ 1591 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1592 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1593 } 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1598 { 1599 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1600 PetscInt *lrows; 1601 PetscInt r, len; 1602 PetscBool cong; 1603 1604 PetscFunctionBegin; 1605 /* get locally owned rows */ 1606 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1607 /* fix right-hand side if needed */ 1608 if (x && b) { 1609 const PetscScalar *xx; 1610 PetscScalar *bb; 1611 1612 PetscCall(VecGetArrayRead(x, &xx)); 1613 PetscCall(VecGetArray(b, &bb)); 1614 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1615 PetscCall(VecRestoreArrayRead(x, &xx)); 1616 PetscCall(VecRestoreArray(b, &bb)); 1617 } 1618 1619 /* actually zap the local rows */ 1620 /* 1621 Zero the required rows. If the "diagonal block" of the matrix 1622 is square and the user wishes to set the diagonal we use separate 1623 code so that MatSetValues() is not called for each diagonal allocating 1624 new memory, thus calling lots of mallocs and slowing things down. 1625 1626 */ 1627 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1628 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1629 PetscCall(MatHasCongruentLayouts(A, &cong)); 1630 if ((diag != 0.0) && cong) { 1631 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1632 } else if (diag != 0.0) { 1633 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1634 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1635 for (r = 0; r < len; ++r) { 1636 const PetscInt row = lrows[r] + A->rmap->rstart; 1637 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1638 } 1639 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1640 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1641 } else { 1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1643 } 1644 PetscCall(PetscFree(lrows)); 1645 1646 /* only change matrix nonzero state if pattern was allowed to be changed */ 1647 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1648 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1649 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1650 } 1651 PetscFunctionReturn(PETSC_SUCCESS); 1652 } 1653 1654 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1655 { 1656 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1657 PetscMPIInt n, p = 0; 1658 PetscInt i, j, k, r, len = 0, row, col, count; 1659 PetscInt *lrows, *owners = A->rmap->range; 1660 PetscSFNode *rrows; 1661 PetscSF sf; 1662 const PetscScalar *xx; 1663 PetscScalar *bb, *mask; 1664 Vec xmask, lmask; 1665 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1666 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1667 PetscScalar *aa; 1668 1669 PetscFunctionBegin; 1670 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1671 /* Create SF where leaves are input rows and roots are owned rows */ 1672 PetscCall(PetscMalloc1(n, &lrows)); 1673 for (r = 0; r < n; ++r) lrows[r] = -1; 1674 PetscCall(PetscMalloc1(N, &rrows)); 1675 for (r = 0; r < N; ++r) { 1676 const PetscInt idx = rows[r]; 1677 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1678 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1679 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1680 } 1681 rrows[r].rank = p; 1682 rrows[r].index = rows[r] - owners[p]; 1683 } 1684 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1685 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1686 /* Collect flags for rows to be zeroed */ 1687 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1688 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1689 PetscCall(PetscSFDestroy(&sf)); 1690 /* Compress and put in row numbers */ 1691 for (r = 0; r < n; ++r) 1692 if (lrows[r] >= 0) lrows[len++] = r; 1693 /* zero diagonal part of matrix */ 1694 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1695 /* handle off-diagonal part of matrix */ 1696 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1697 PetscCall(VecDuplicate(l->lvec, &lmask)); 1698 PetscCall(VecGetArray(xmask, &bb)); 1699 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1700 PetscCall(VecRestoreArray(xmask, &bb)); 1701 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1702 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1703 PetscCall(VecDestroy(&xmask)); 1704 if (x) { 1705 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1706 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1707 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1708 PetscCall(VecGetArray(b, &bb)); 1709 } 1710 PetscCall(VecGetArray(lmask, &mask)); 1711 /* remove zeroed rows of off-diagonal matrix */ 1712 for (i = 0; i < len; ++i) { 1713 row = lrows[i]; 1714 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1715 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1716 for (k = 0; k < count; ++k) { 1717 aa[0] = 0.0; 1718 aa += bs; 1719 } 1720 } 1721 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1722 for (i = 0; i < l->B->rmap->N; ++i) { 1723 row = i / bs; 1724 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1725 for (k = 0; k < bs; ++k) { 1726 col = bs * baij->j[j] + k; 1727 if (PetscAbsScalar(mask[col])) { 1728 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1729 if (x) bb[i] -= aa[0] * xx[col]; 1730 aa[0] = 0.0; 1731 } 1732 } 1733 } 1734 } 1735 if (x) { 1736 PetscCall(VecRestoreArray(b, &bb)); 1737 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1738 } 1739 PetscCall(VecRestoreArray(lmask, &mask)); 1740 PetscCall(VecDestroy(&lmask)); 1741 PetscCall(PetscFree(lrows)); 1742 1743 /* only change matrix nonzero state if pattern was allowed to be changed */ 1744 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1745 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1746 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1747 } 1748 PetscFunctionReturn(PETSC_SUCCESS); 1749 } 1750 1751 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1752 { 1753 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1754 1755 PetscFunctionBegin; 1756 PetscCall(MatSetUnfactored(a->A)); 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1761 1762 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1763 { 1764 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1765 Mat a, b, c, d; 1766 PetscBool flg; 1767 1768 PetscFunctionBegin; 1769 a = matA->A; 1770 b = matA->B; 1771 c = matB->A; 1772 d = matB->B; 1773 1774 PetscCall(MatEqual(a, c, &flg)); 1775 if (flg) PetscCall(MatEqual(b, d, &flg)); 1776 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1777 PetscFunctionReturn(PETSC_SUCCESS); 1778 } 1779 1780 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1781 { 1782 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1783 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1784 1785 PetscFunctionBegin; 1786 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1787 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1788 PetscCall(MatCopy_Basic(A, B, str)); 1789 } else { 1790 PetscCall(MatCopy(a->A, b->A, str)); 1791 PetscCall(MatCopy(a->B, b->B, str)); 1792 } 1793 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1798 { 1799 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1800 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1801 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1802 1803 PetscFunctionBegin; 1804 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1809 { 1810 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1811 PetscBLASInt bnz, one = 1; 1812 Mat_SeqBAIJ *x, *y; 1813 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1814 1815 PetscFunctionBegin; 1816 if (str == SAME_NONZERO_PATTERN) { 1817 PetscScalar alpha = a; 1818 x = (Mat_SeqBAIJ *)xx->A->data; 1819 y = (Mat_SeqBAIJ *)yy->A->data; 1820 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1821 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1822 x = (Mat_SeqBAIJ *)xx->B->data; 1823 y = (Mat_SeqBAIJ *)yy->B->data; 1824 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1825 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1826 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1827 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1828 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1829 } else { 1830 Mat B; 1831 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1832 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1833 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1834 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1835 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1836 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1837 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1838 PetscCall(MatSetType(B, MATMPIBAIJ)); 1839 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1840 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1841 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1842 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1843 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1844 PetscCall(MatHeaderMerge(Y, &B)); 1845 PetscCall(PetscFree(nnz_d)); 1846 PetscCall(PetscFree(nnz_o)); 1847 } 1848 PetscFunctionReturn(PETSC_SUCCESS); 1849 } 1850 1851 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1852 { 1853 PetscFunctionBegin; 1854 if (PetscDefined(USE_COMPLEX)) { 1855 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1856 1857 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1858 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1859 } 1860 PetscFunctionReturn(PETSC_SUCCESS); 1861 } 1862 1863 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1864 { 1865 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1866 1867 PetscFunctionBegin; 1868 PetscCall(MatRealPart(a->A)); 1869 PetscCall(MatRealPart(a->B)); 1870 PetscFunctionReturn(PETSC_SUCCESS); 1871 } 1872 1873 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1874 { 1875 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1876 1877 PetscFunctionBegin; 1878 PetscCall(MatImaginaryPart(a->A)); 1879 PetscCall(MatImaginaryPart(a->B)); 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1884 { 1885 IS iscol_local; 1886 PetscInt csize; 1887 1888 PetscFunctionBegin; 1889 PetscCall(ISGetLocalSize(iscol, &csize)); 1890 if (call == MAT_REUSE_MATRIX) { 1891 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1892 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1893 } else { 1894 PetscCall(ISAllGather(iscol, &iscol_local)); 1895 } 1896 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1897 if (call == MAT_INITIAL_MATRIX) { 1898 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1899 PetscCall(ISDestroy(&iscol_local)); 1900 } 1901 PetscFunctionReturn(PETSC_SUCCESS); 1902 } 1903 1904 /* 1905 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1906 in local and then by concatenating the local matrices the end result. 1907 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1908 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1909 */ 1910 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1911 { 1912 PetscMPIInt rank, size; 1913 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1914 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1915 Mat M, Mreuse; 1916 MatScalar *vwork, *aa; 1917 MPI_Comm comm; 1918 IS isrow_new, iscol_new; 1919 Mat_SeqBAIJ *aij; 1920 1921 PetscFunctionBegin; 1922 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1923 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1924 PetscCallMPI(MPI_Comm_size(comm, &size)); 1925 /* The compression and expansion should be avoided. Doesn't point 1926 out errors, might change the indices, hence buggey */ 1927 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1928 if (isrow == iscol) { 1929 iscol_new = isrow_new; 1930 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1931 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1932 1933 if (call == MAT_REUSE_MATRIX) { 1934 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1935 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1936 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1937 } else { 1938 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1939 } 1940 PetscCall(ISDestroy(&isrow_new)); 1941 PetscCall(ISDestroy(&iscol_new)); 1942 /* 1943 m - number of local rows 1944 n - number of columns (same on all processors) 1945 rstart - first row in new global matrix generated 1946 */ 1947 PetscCall(MatGetBlockSize(mat, &bs)); 1948 PetscCall(MatGetSize(Mreuse, &m, &n)); 1949 m = m / bs; 1950 n = n / bs; 1951 1952 if (call == MAT_INITIAL_MATRIX) { 1953 aij = (Mat_SeqBAIJ *)Mreuse->data; 1954 ii = aij->i; 1955 jj = aij->j; 1956 1957 /* 1958 Determine the number of non-zeros in the diagonal and off-diagonal 1959 portions of the matrix in order to do correct preallocation 1960 */ 1961 1962 /* first get start and end of "diagonal" columns */ 1963 if (csize == PETSC_DECIDE) { 1964 PetscCall(ISGetSize(isrow, &mglobal)); 1965 if (mglobal == n * bs) { /* square matrix */ 1966 nlocal = m; 1967 } else { 1968 nlocal = n / size + ((n % size) > rank); 1969 } 1970 } else { 1971 nlocal = csize / bs; 1972 } 1973 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1974 rstart = rend - nlocal; 1975 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1976 1977 /* next, compute all the lengths */ 1978 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1979 for (i = 0; i < m; i++) { 1980 jend = ii[i + 1] - ii[i]; 1981 olen = 0; 1982 dlen = 0; 1983 for (j = 0; j < jend; j++) { 1984 if (*jj < rstart || *jj >= rend) olen++; 1985 else dlen++; 1986 jj++; 1987 } 1988 olens[i] = olen; 1989 dlens[i] = dlen; 1990 } 1991 PetscCall(MatCreate(comm, &M)); 1992 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1993 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 1994 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1995 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1996 PetscCall(PetscFree2(dlens, olens)); 1997 } else { 1998 PetscInt ml, nl; 1999 2000 M = *newmat; 2001 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2002 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2003 PetscCall(MatZeroEntries(M)); 2004 /* 2005 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2006 rather than the slower MatSetValues(). 2007 */ 2008 M->was_assembled = PETSC_TRUE; 2009 M->assembled = PETSC_FALSE; 2010 } 2011 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2012 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2013 aij = (Mat_SeqBAIJ *)Mreuse->data; 2014 ii = aij->i; 2015 jj = aij->j; 2016 aa = aij->a; 2017 for (i = 0; i < m; i++) { 2018 row = rstart / bs + i; 2019 nz = ii[i + 1] - ii[i]; 2020 cwork = jj; 2021 jj = PetscSafePointerPlusOffset(jj, nz); 2022 vwork = aa; 2023 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2024 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2025 } 2026 2027 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2028 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2029 *newmat = M; 2030 2031 /* save submatrix used in processor for next request */ 2032 if (call == MAT_INITIAL_MATRIX) { 2033 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2034 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2035 } 2036 PetscFunctionReturn(PETSC_SUCCESS); 2037 } 2038 2039 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2040 { 2041 MPI_Comm comm, pcomm; 2042 PetscInt clocal_size, nrows; 2043 const PetscInt *rows; 2044 PetscMPIInt size; 2045 IS crowp, lcolp; 2046 2047 PetscFunctionBegin; 2048 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2049 /* make a collective version of 'rowp' */ 2050 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2051 if (pcomm == comm) { 2052 crowp = rowp; 2053 } else { 2054 PetscCall(ISGetSize(rowp, &nrows)); 2055 PetscCall(ISGetIndices(rowp, &rows)); 2056 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2057 PetscCall(ISRestoreIndices(rowp, &rows)); 2058 } 2059 PetscCall(ISSetPermutation(crowp)); 2060 /* make a local version of 'colp' */ 2061 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2062 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2063 if (size == 1) { 2064 lcolp = colp; 2065 } else { 2066 PetscCall(ISAllGather(colp, &lcolp)); 2067 } 2068 PetscCall(ISSetPermutation(lcolp)); 2069 /* now we just get the submatrix */ 2070 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2071 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2072 /* clean up */ 2073 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2074 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2075 PetscFunctionReturn(PETSC_SUCCESS); 2076 } 2077 2078 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2079 { 2080 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2081 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2082 2083 PetscFunctionBegin; 2084 if (nghosts) *nghosts = B->nbs; 2085 if (ghosts) *ghosts = baij->garray; 2086 PetscFunctionReturn(PETSC_SUCCESS); 2087 } 2088 2089 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2090 { 2091 Mat B; 2092 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2093 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2094 Mat_SeqAIJ *b; 2095 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2096 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2097 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2098 2099 PetscFunctionBegin; 2100 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2101 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2102 2103 /* Tell every processor the number of nonzeros per row */ 2104 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2105 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2106 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2107 displs = recvcounts + size; 2108 for (i = 0; i < size; i++) { 2109 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2110 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2111 } 2112 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2113 /* Create the sequential matrix of the same type as the local block diagonal */ 2114 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2115 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2116 PetscCall(MatSetType(B, MATSEQAIJ)); 2117 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2118 b = (Mat_SeqAIJ *)B->data; 2119 2120 /* Copy my part of matrix column indices over */ 2121 sendcount = ad->nz + bd->nz; 2122 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2123 a_jsendbuf = ad->j; 2124 b_jsendbuf = bd->j; 2125 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2126 cnt = 0; 2127 for (i = 0; i < n; i++) { 2128 /* put in lower diagonal portion */ 2129 m = bd->i[i + 1] - bd->i[i]; 2130 while (m > 0) { 2131 /* is it above diagonal (in bd (compressed) numbering) */ 2132 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2133 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2134 m--; 2135 } 2136 2137 /* put in diagonal portion */ 2138 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2139 2140 /* put in upper diagonal portion */ 2141 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2142 } 2143 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2144 2145 /* Gather all column indices to all processors */ 2146 for (i = 0; i < size; i++) { 2147 recvcounts[i] = 0; 2148 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2149 } 2150 displs[0] = 0; 2151 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2152 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2153 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2154 /* set the b->ilen (length of each row) values */ 2155 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2156 /* set the b->i indices */ 2157 b->i[0] = 0; 2158 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2159 PetscCall(PetscFree(lens)); 2160 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2161 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2162 PetscCall(PetscFree(recvcounts)); 2163 2164 PetscCall(MatPropagateSymmetryOptions(A, B)); 2165 *newmat = B; 2166 PetscFunctionReturn(PETSC_SUCCESS); 2167 } 2168 2169 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2170 { 2171 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2172 Vec bb1 = NULL; 2173 2174 PetscFunctionBegin; 2175 if (flag == SOR_APPLY_UPPER) { 2176 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2181 2182 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2183 if (flag & SOR_ZERO_INITIAL_GUESS) { 2184 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2185 its--; 2186 } 2187 2188 while (its--) { 2189 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2190 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2191 2192 /* update rhs: bb1 = bb - B*x */ 2193 PetscCall(VecScale(mat->lvec, -1.0)); 2194 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2195 2196 /* local sweep */ 2197 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2198 } 2199 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2200 if (flag & SOR_ZERO_INITIAL_GUESS) { 2201 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2202 its--; 2203 } 2204 while (its--) { 2205 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2206 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2207 2208 /* update rhs: bb1 = bb - B*x */ 2209 PetscCall(VecScale(mat->lvec, -1.0)); 2210 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2211 2212 /* local sweep */ 2213 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2214 } 2215 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2216 if (flag & SOR_ZERO_INITIAL_GUESS) { 2217 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2218 its--; 2219 } 2220 while (its--) { 2221 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2222 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2223 2224 /* update rhs: bb1 = bb - B*x */ 2225 PetscCall(VecScale(mat->lvec, -1.0)); 2226 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2227 2228 /* local sweep */ 2229 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2230 } 2231 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2232 2233 PetscCall(VecDestroy(&bb1)); 2234 PetscFunctionReturn(PETSC_SUCCESS); 2235 } 2236 2237 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2238 { 2239 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2240 PetscInt m, N, i, *garray = aij->garray; 2241 PetscInt ib, jb, bs = A->rmap->bs; 2242 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2243 MatScalar *a_val = a_aij->a; 2244 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2245 MatScalar *b_val = b_aij->a; 2246 PetscReal *work; 2247 2248 PetscFunctionBegin; 2249 PetscCall(MatGetSize(A, &m, &N)); 2250 PetscCall(PetscCalloc1(N, &work)); 2251 if (type == NORM_2) { 2252 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2253 for (jb = 0; jb < bs; jb++) { 2254 for (ib = 0; ib < bs; ib++) { 2255 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2256 a_val++; 2257 } 2258 } 2259 } 2260 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2261 for (jb = 0; jb < bs; jb++) { 2262 for (ib = 0; ib < bs; ib++) { 2263 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2264 b_val++; 2265 } 2266 } 2267 } 2268 } else if (type == NORM_1) { 2269 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2270 for (jb = 0; jb < bs; jb++) { 2271 for (ib = 0; ib < bs; ib++) { 2272 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2273 a_val++; 2274 } 2275 } 2276 } 2277 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2278 for (jb = 0; jb < bs; jb++) { 2279 for (ib = 0; ib < bs; ib++) { 2280 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2281 b_val++; 2282 } 2283 } 2284 } 2285 } else if (type == NORM_INFINITY) { 2286 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2287 for (jb = 0; jb < bs; jb++) { 2288 for (ib = 0; ib < bs; ib++) { 2289 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2290 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2291 a_val++; 2292 } 2293 } 2294 } 2295 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2296 for (jb = 0; jb < bs; jb++) { 2297 for (ib = 0; ib < bs; ib++) { 2298 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2299 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2300 b_val++; 2301 } 2302 } 2303 } 2304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2305 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2306 for (jb = 0; jb < bs; jb++) { 2307 for (ib = 0; ib < bs; ib++) { 2308 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2309 a_val++; 2310 } 2311 } 2312 } 2313 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2314 for (jb = 0; jb < bs; jb++) { 2315 for (ib = 0; ib < bs; ib++) { 2316 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2317 b_val++; 2318 } 2319 } 2320 } 2321 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2322 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2323 for (jb = 0; jb < bs; jb++) { 2324 for (ib = 0; ib < bs; ib++) { 2325 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2326 a_val++; 2327 } 2328 } 2329 } 2330 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2331 for (jb = 0; jb < bs; jb++) { 2332 for (ib = 0; ib < bs; ib++) { 2333 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2334 b_val++; 2335 } 2336 } 2337 } 2338 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2339 if (type == NORM_INFINITY) { 2340 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2341 } else { 2342 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2343 } 2344 PetscCall(PetscFree(work)); 2345 if (type == NORM_2) { 2346 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2347 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2348 for (i = 0; i < N; i++) reductions[i] /= m; 2349 } 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } 2352 2353 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2354 { 2355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2356 2357 PetscFunctionBegin; 2358 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2359 A->factorerrortype = a->A->factorerrortype; 2360 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2361 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2362 PetscFunctionReturn(PETSC_SUCCESS); 2363 } 2364 2365 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2366 { 2367 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2368 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2369 2370 PetscFunctionBegin; 2371 if (!Y->preallocated) { 2372 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2373 } else if (!aij->nz) { 2374 PetscInt nonew = aij->nonew; 2375 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2376 aij->nonew = nonew; 2377 } 2378 PetscCall(MatShift_Basic(Y, a)); 2379 PetscFunctionReturn(PETSC_SUCCESS); 2380 } 2381 2382 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2383 { 2384 PetscFunctionBegin; 2385 *a = ((Mat_MPIBAIJ *)A->data)->A; 2386 PetscFunctionReturn(PETSC_SUCCESS); 2387 } 2388 2389 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2390 { 2391 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2392 2393 PetscFunctionBegin; 2394 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2395 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2396 PetscFunctionReturn(PETSC_SUCCESS); 2397 } 2398 2399 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2400 MatGetRow_MPIBAIJ, 2401 MatRestoreRow_MPIBAIJ, 2402 MatMult_MPIBAIJ, 2403 /* 4*/ MatMultAdd_MPIBAIJ, 2404 MatMultTranspose_MPIBAIJ, 2405 MatMultTransposeAdd_MPIBAIJ, 2406 NULL, 2407 NULL, 2408 NULL, 2409 /*10*/ NULL, 2410 NULL, 2411 NULL, 2412 MatSOR_MPIBAIJ, 2413 MatTranspose_MPIBAIJ, 2414 /*15*/ MatGetInfo_MPIBAIJ, 2415 MatEqual_MPIBAIJ, 2416 MatGetDiagonal_MPIBAIJ, 2417 MatDiagonalScale_MPIBAIJ, 2418 MatNorm_MPIBAIJ, 2419 /*20*/ MatAssemblyBegin_MPIBAIJ, 2420 MatAssemblyEnd_MPIBAIJ, 2421 MatSetOption_MPIBAIJ, 2422 MatZeroEntries_MPIBAIJ, 2423 /*24*/ MatZeroRows_MPIBAIJ, 2424 NULL, 2425 NULL, 2426 NULL, 2427 NULL, 2428 /*29*/ MatSetUp_MPI_Hash, 2429 NULL, 2430 NULL, 2431 MatGetDiagonalBlock_MPIBAIJ, 2432 NULL, 2433 /*34*/ MatDuplicate_MPIBAIJ, 2434 NULL, 2435 NULL, 2436 NULL, 2437 NULL, 2438 /*39*/ MatAXPY_MPIBAIJ, 2439 MatCreateSubMatrices_MPIBAIJ, 2440 MatIncreaseOverlap_MPIBAIJ, 2441 MatGetValues_MPIBAIJ, 2442 MatCopy_MPIBAIJ, 2443 /*44*/ NULL, 2444 MatScale_MPIBAIJ, 2445 MatShift_MPIBAIJ, 2446 NULL, 2447 MatZeroRowsColumns_MPIBAIJ, 2448 /*49*/ NULL, 2449 NULL, 2450 NULL, 2451 NULL, 2452 NULL, 2453 /*54*/ MatFDColoringCreate_MPIXAIJ, 2454 NULL, 2455 MatSetUnfactored_MPIBAIJ, 2456 MatPermute_MPIBAIJ, 2457 MatSetValuesBlocked_MPIBAIJ, 2458 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2459 MatDestroy_MPIBAIJ, 2460 MatView_MPIBAIJ, 2461 NULL, 2462 NULL, 2463 /*64*/ NULL, 2464 NULL, 2465 NULL, 2466 NULL, 2467 MatGetRowMaxAbs_MPIBAIJ, 2468 /*69*/ NULL, 2469 NULL, 2470 NULL, 2471 MatFDColoringApply_BAIJ, 2472 NULL, 2473 /*74*/ NULL, 2474 NULL, 2475 NULL, 2476 NULL, 2477 MatLoad_MPIBAIJ, 2478 /*79*/ NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 NULL, 2483 /*84*/ NULL, 2484 NULL, 2485 NULL, 2486 NULL, 2487 NULL, 2488 /*89*/ NULL, 2489 NULL, 2490 NULL, 2491 NULL, 2492 MatConjugate_MPIBAIJ, 2493 /*94*/ NULL, 2494 NULL, 2495 MatRealPart_MPIBAIJ, 2496 MatImaginaryPart_MPIBAIJ, 2497 NULL, 2498 /*99*/ NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 NULL, 2503 /*104*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2504 NULL, 2505 MatGetGhosts_MPIBAIJ, 2506 NULL, 2507 NULL, 2508 /*109*/ NULL, 2509 NULL, 2510 NULL, 2511 NULL, 2512 MatGetMultiProcBlock_MPIBAIJ, 2513 /*114*/ NULL, 2514 MatGetColumnReductions_MPIBAIJ, 2515 MatInvertBlockDiagonal_MPIBAIJ, 2516 NULL, 2517 NULL, 2518 /*119*/ NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 /*124*/ NULL, 2524 NULL, 2525 MatSetBlockSizes_Default, 2526 NULL, 2527 MatFDColoringSetUp_MPIXAIJ, 2528 /*129*/ NULL, 2529 MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2530 NULL, 2531 NULL, 2532 NULL, 2533 /*134*/ NULL, 2534 NULL, 2535 MatEliminateZeros_MPIBAIJ, 2536 MatGetRowSumAbs_MPIBAIJ, 2537 NULL, 2538 /*139*/ NULL, 2539 NULL, 2540 MatCopyHashToXAIJ_MPI_Hash, 2541 NULL, 2542 NULL}; 2543 2544 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2545 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2546 2547 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2548 { 2549 PetscInt m, rstart, cstart, cend; 2550 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2551 const PetscInt *JJ = NULL; 2552 PetscScalar *values = NULL; 2553 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2554 PetscBool nooffprocentries; 2555 2556 PetscFunctionBegin; 2557 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2558 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2559 PetscCall(PetscLayoutSetUp(B->rmap)); 2560 PetscCall(PetscLayoutSetUp(B->cmap)); 2561 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2562 m = B->rmap->n / bs; 2563 rstart = B->rmap->rstart / bs; 2564 cstart = B->cmap->rstart / bs; 2565 cend = B->cmap->rend / bs; 2566 2567 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2568 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2569 for (i = 0; i < m; i++) { 2570 nz = ii[i + 1] - ii[i]; 2571 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2572 nz_max = PetscMax(nz_max, nz); 2573 dlen = 0; 2574 olen = 0; 2575 JJ = jj + ii[i]; 2576 for (j = 0; j < nz; j++) { 2577 if (*JJ < cstart || *JJ >= cend) olen++; 2578 else dlen++; 2579 JJ++; 2580 } 2581 d_nnz[i] = dlen; 2582 o_nnz[i] = olen; 2583 } 2584 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2585 PetscCall(PetscFree2(d_nnz, o_nnz)); 2586 2587 values = (PetscScalar *)V; 2588 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2589 for (i = 0; i < m; i++) { 2590 PetscInt row = i + rstart; 2591 PetscInt ncols = ii[i + 1] - ii[i]; 2592 const PetscInt *icols = jj + ii[i]; 2593 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2594 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2595 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2596 } else { /* block ordering does not match so we can only insert one block at a time. */ 2597 PetscInt j; 2598 for (j = 0; j < ncols; j++) { 2599 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2600 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2601 } 2602 } 2603 } 2604 2605 if (!V) PetscCall(PetscFree(values)); 2606 nooffprocentries = B->nooffprocentries; 2607 B->nooffprocentries = PETSC_TRUE; 2608 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2609 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2610 B->nooffprocentries = nooffprocentries; 2611 2612 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2613 PetscFunctionReturn(PETSC_SUCCESS); 2614 } 2615 2616 /*@C 2617 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2618 2619 Collective 2620 2621 Input Parameters: 2622 + B - the matrix 2623 . bs - the block size 2624 . i - the indices into `j` for the start of each local row (starts with zero) 2625 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2626 - v - optional values in the matrix, use `NULL` if not provided 2627 2628 Level: advanced 2629 2630 Notes: 2631 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2632 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2633 called this routine. 2634 2635 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2636 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2637 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2638 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2639 block column and the second index is over columns within a block. 2640 2641 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2642 2643 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2644 @*/ 2645 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2646 { 2647 PetscFunctionBegin; 2648 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2649 PetscValidType(B, 1); 2650 PetscValidLogicalCollectiveInt(B, bs, 2); 2651 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2656 { 2657 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2658 PetscInt i; 2659 PetscMPIInt size; 2660 2661 PetscFunctionBegin; 2662 if (B->hash_active) { 2663 B->ops[0] = b->cops; 2664 B->hash_active = PETSC_FALSE; 2665 } 2666 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2667 PetscCall(MatSetBlockSize(B, bs)); 2668 PetscCall(PetscLayoutSetUp(B->rmap)); 2669 PetscCall(PetscLayoutSetUp(B->cmap)); 2670 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2671 2672 if (d_nnz) { 2673 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2674 } 2675 if (o_nnz) { 2676 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2677 } 2678 2679 b->bs2 = bs * bs; 2680 b->mbs = B->rmap->n / bs; 2681 b->nbs = B->cmap->n / bs; 2682 b->Mbs = B->rmap->N / bs; 2683 b->Nbs = B->cmap->N / bs; 2684 2685 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2686 b->rstartbs = B->rmap->rstart / bs; 2687 b->rendbs = B->rmap->rend / bs; 2688 b->cstartbs = B->cmap->rstart / bs; 2689 b->cendbs = B->cmap->rend / bs; 2690 2691 #if defined(PETSC_USE_CTABLE) 2692 PetscCall(PetscHMapIDestroy(&b->colmap)); 2693 #else 2694 PetscCall(PetscFree(b->colmap)); 2695 #endif 2696 PetscCall(PetscFree(b->garray)); 2697 PetscCall(VecDestroy(&b->lvec)); 2698 PetscCall(VecScatterDestroy(&b->Mvctx)); 2699 2700 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2701 2702 MatSeqXAIJGetOptions_Private(b->B); 2703 PetscCall(MatDestroy(&b->B)); 2704 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2705 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2706 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2707 MatSeqXAIJRestoreOptions_Private(b->B); 2708 2709 MatSeqXAIJGetOptions_Private(b->A); 2710 PetscCall(MatDestroy(&b->A)); 2711 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2712 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2713 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2714 MatSeqXAIJRestoreOptions_Private(b->A); 2715 2716 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2717 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2718 B->preallocated = PETSC_TRUE; 2719 B->was_assembled = PETSC_FALSE; 2720 B->assembled = PETSC_FALSE; 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2725 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2726 2727 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2728 { 2729 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2730 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2731 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2732 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2733 2734 PetscFunctionBegin; 2735 PetscCall(PetscMalloc1(M + 1, &ii)); 2736 ii[0] = 0; 2737 for (i = 0; i < M; i++) { 2738 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2739 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2740 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2741 /* remove one from count of matrix has diagonal */ 2742 for (j = id[i]; j < id[i + 1]; j++) { 2743 if (jd[j] == i) { 2744 ii[i + 1]--; 2745 break; 2746 } 2747 } 2748 } 2749 PetscCall(PetscMalloc1(ii[M], &jj)); 2750 cnt = 0; 2751 for (i = 0; i < M; i++) { 2752 for (j = io[i]; j < io[i + 1]; j++) { 2753 if (garray[jo[j]] > rstart) break; 2754 jj[cnt++] = garray[jo[j]]; 2755 } 2756 for (k = id[i]; k < id[i + 1]; k++) { 2757 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2758 } 2759 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2760 } 2761 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2762 PetscFunctionReturn(PETSC_SUCCESS); 2763 } 2764 2765 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2766 2767 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2768 2769 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2770 { 2771 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2772 Mat_MPIAIJ *b; 2773 Mat B; 2774 2775 PetscFunctionBegin; 2776 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2777 2778 if (reuse == MAT_REUSE_MATRIX) { 2779 B = *newmat; 2780 } else { 2781 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2782 PetscCall(MatSetType(B, MATMPIAIJ)); 2783 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2784 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2785 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2786 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2787 } 2788 b = (Mat_MPIAIJ *)B->data; 2789 2790 if (reuse == MAT_REUSE_MATRIX) { 2791 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2792 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2793 } else { 2794 PetscInt *garray = a->garray; 2795 Mat_SeqAIJ *bB; 2796 PetscInt bs, nnz; 2797 PetscCall(MatDestroy(&b->A)); 2798 PetscCall(MatDestroy(&b->B)); 2799 /* just clear out the data structure */ 2800 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2801 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2802 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2803 2804 /* Global numbering for b->B columns */ 2805 bB = (Mat_SeqAIJ *)b->B->data; 2806 bs = A->rmap->bs; 2807 nnz = bB->i[A->rmap->n]; 2808 for (PetscInt k = 0; k < nnz; k++) { 2809 PetscInt bj = bB->j[k] / bs; 2810 PetscInt br = bB->j[k] % bs; 2811 bB->j[k] = garray[bj] * bs + br; 2812 } 2813 } 2814 PetscCall(MatSetOption(B, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 2815 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2816 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2817 PetscCall(MatSetOption(B, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 2818 2819 if (reuse == MAT_INPLACE_MATRIX) { 2820 PetscCall(MatHeaderReplace(A, &B)); 2821 } else { 2822 *newmat = B; 2823 } 2824 PetscFunctionReturn(PETSC_SUCCESS); 2825 } 2826 2827 /*MC 2828 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2829 2830 Options Database Keys: 2831 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2832 . -mat_block_size <bs> - set the blocksize used to store the matrix 2833 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2834 - -mat_use_hash_table <fact> - set hash table factor 2835 2836 Level: beginner 2837 2838 Note: 2839 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2840 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2841 2842 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2843 M*/ 2844 2845 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2846 2847 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2848 { 2849 Mat_MPIBAIJ *b; 2850 PetscBool flg = PETSC_FALSE; 2851 2852 PetscFunctionBegin; 2853 PetscCall(PetscNew(&b)); 2854 B->data = (void *)b; 2855 B->ops[0] = MatOps_Values; 2856 B->assembled = PETSC_FALSE; 2857 2858 B->insertmode = NOT_SET_VALUES; 2859 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2860 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2861 2862 /* build local table of row and column ownerships */ 2863 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2864 2865 /* build cache for off array entries formed */ 2866 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2867 2868 b->donotstash = PETSC_FALSE; 2869 b->colmap = NULL; 2870 b->garray = NULL; 2871 b->roworiented = PETSC_TRUE; 2872 2873 /* stuff used in block assembly */ 2874 b->barray = NULL; 2875 2876 /* stuff used for matrix vector multiply */ 2877 b->lvec = NULL; 2878 b->Mvctx = NULL; 2879 2880 /* stuff for MatGetRow() */ 2881 b->rowindices = NULL; 2882 b->rowvalues = NULL; 2883 b->getrowactive = PETSC_FALSE; 2884 2885 /* hash table stuff */ 2886 b->ht = NULL; 2887 b->hd = NULL; 2888 b->ht_size = 0; 2889 b->ht_flag = PETSC_FALSE; 2890 b->ht_fact = 0; 2891 b->ht_total_ct = 0; 2892 b->ht_insert_ct = 0; 2893 2894 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2895 b->ijonly = PETSC_FALSE; 2896 2897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2900 #if defined(PETSC_HAVE_HYPRE) 2901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2902 #endif 2903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2910 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2911 2912 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2913 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2914 if (flg) { 2915 PetscReal fact = 1.39; 2916 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2917 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2918 if (fact <= 1.0) fact = 1.39; 2919 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2920 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2921 } 2922 PetscOptionsEnd(); 2923 PetscFunctionReturn(PETSC_SUCCESS); 2924 } 2925 2926 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2927 /*MC 2928 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2929 2930 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2931 and `MATMPIBAIJ` otherwise. 2932 2933 Options Database Keys: 2934 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2935 2936 Level: beginner 2937 2938 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2939 M*/ 2940 2941 /*@ 2942 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2943 (block compressed row). 2944 2945 Collective 2946 2947 Input Parameters: 2948 + B - the matrix 2949 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2950 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2951 . d_nz - number of block nonzeros per block row in diagonal portion of local 2952 submatrix (same for all local rows) 2953 . d_nnz - array containing the number of block nonzeros in the various block rows 2954 of the in diagonal portion of the local (possibly different for each block 2955 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2956 set it even if it is zero. 2957 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2958 submatrix (same for all local rows). 2959 - o_nnz - array containing the number of nonzeros in the various block rows of the 2960 off-diagonal portion of the local submatrix (possibly different for 2961 each block row) or `NULL`. 2962 2963 If the *_nnz parameter is given then the *_nz parameter is ignored 2964 2965 Options Database Keys: 2966 + -mat_block_size - size of the blocks to use 2967 - -mat_use_hash_table <fact> - set hash table factor 2968 2969 Level: intermediate 2970 2971 Notes: 2972 For good matrix assembly performance 2973 the user should preallocate the matrix storage by setting the parameters 2974 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 2975 performance can be increased by more than a factor of 50. 2976 2977 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2978 than it must be used on all processors that share the object for that argument. 2979 2980 Storage Information: 2981 For a square global matrix we define each processor's diagonal portion 2982 to be its local rows and the corresponding columns (a square submatrix); 2983 each processor's off-diagonal portion encompasses the remainder of the 2984 local matrix (a rectangular submatrix). 2985 2986 The user can specify preallocated storage for the diagonal part of 2987 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2988 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2989 memory allocation. Likewise, specify preallocated storage for the 2990 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2991 2992 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2993 the figure below we depict these three local rows and all columns (0-11). 2994 2995 .vb 2996 0 1 2 3 4 5 6 7 8 9 10 11 2997 -------------------------- 2998 row 3 |o o o d d d o o o o o o 2999 row 4 |o o o d d d o o o o o o 3000 row 5 |o o o d d d o o o o o o 3001 -------------------------- 3002 .ve 3003 3004 Thus, any entries in the d locations are stored in the d (diagonal) 3005 submatrix, and any entries in the o locations are stored in the 3006 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3007 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3008 3009 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3010 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3011 In general, for PDE problems in which most nonzeros are near the diagonal, 3012 one expects `d_nz` >> `o_nz`. 3013 3014 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3015 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3016 You can also run with the option `-info` and look for messages with the string 3017 malloc in them to see if additional memory allocation was needed. 3018 3019 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3020 @*/ 3021 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3022 { 3023 PetscFunctionBegin; 3024 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3025 PetscValidType(B, 1); 3026 PetscValidLogicalCollectiveInt(B, bs, 2); 3027 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3028 PetscFunctionReturn(PETSC_SUCCESS); 3029 } 3030 3031 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3032 /*@ 3033 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3034 (block compressed row). 3035 3036 Collective 3037 3038 Input Parameters: 3039 + comm - MPI communicator 3040 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3041 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3042 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3043 This value should be the same as the local size used in creating the 3044 y vector for the matrix-vector product y = Ax. 3045 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3046 This value should be the same as the local size used in creating the 3047 x vector for the matrix-vector product y = Ax. 3048 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3049 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3050 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3051 submatrix (same for all local rows) 3052 . d_nnz - array containing the number of nonzero blocks in the various block rows 3053 of the in diagonal portion of the local (possibly different for each block 3054 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3055 and set it even if it is zero. 3056 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3057 submatrix (same for all local rows). 3058 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3059 off-diagonal portion of the local submatrix (possibly different for 3060 each block row) or NULL. 3061 3062 Output Parameter: 3063 . A - the matrix 3064 3065 Options Database Keys: 3066 + -mat_block_size - size of the blocks to use 3067 - -mat_use_hash_table <fact> - set hash table factor 3068 3069 Level: intermediate 3070 3071 Notes: 3072 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3073 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3074 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3075 3076 For good matrix assembly performance 3077 the user should preallocate the matrix storage by setting the parameters 3078 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3079 performance can be increased by more than a factor of 50. 3080 3081 If the *_nnz parameter is given then the *_nz parameter is ignored 3082 3083 A nonzero block is any block that as 1 or more nonzeros in it 3084 3085 The user MUST specify either the local or global matrix dimensions 3086 (possibly both). 3087 3088 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3089 than it must be used on all processors that share the object for that argument. 3090 3091 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3092 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3093 3094 Storage Information: 3095 For a square global matrix we define each processor's diagonal portion 3096 to be its local rows and the corresponding columns (a square submatrix); 3097 each processor's off-diagonal portion encompasses the remainder of the 3098 local matrix (a rectangular submatrix). 3099 3100 The user can specify preallocated storage for the diagonal part of 3101 the local submatrix with either d_nz or d_nnz (not both). Set 3102 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3103 memory allocation. Likewise, specify preallocated storage for the 3104 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3105 3106 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3107 the figure below we depict these three local rows and all columns (0-11). 3108 3109 .vb 3110 0 1 2 3 4 5 6 7 8 9 10 11 3111 -------------------------- 3112 row 3 |o o o d d d o o o o o o 3113 row 4 |o o o d d d o o o o o o 3114 row 5 |o o o d d d o o o o o o 3115 -------------------------- 3116 .ve 3117 3118 Thus, any entries in the d locations are stored in the d (diagonal) 3119 submatrix, and any entries in the o locations are stored in the 3120 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3121 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3122 3123 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3124 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3125 In general, for PDE problems in which most nonzeros are near the diagonal, 3126 one expects `d_nz` >> `o_nz`. 3127 3128 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3129 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3130 @*/ 3131 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3132 { 3133 PetscMPIInt size; 3134 3135 PetscFunctionBegin; 3136 PetscCall(MatCreate(comm, A)); 3137 PetscCall(MatSetSizes(*A, m, n, M, N)); 3138 PetscCallMPI(MPI_Comm_size(comm, &size)); 3139 if (size > 1) { 3140 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3141 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3142 } else { 3143 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3144 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3145 } 3146 PetscFunctionReturn(PETSC_SUCCESS); 3147 } 3148 3149 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3150 { 3151 Mat mat; 3152 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3153 PetscInt len = 0; 3154 3155 PetscFunctionBegin; 3156 *newmat = NULL; 3157 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3158 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3159 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3160 3161 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3162 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3163 if (matin->hash_active) { 3164 PetscCall(MatSetUp(mat)); 3165 } else { 3166 mat->factortype = matin->factortype; 3167 mat->preallocated = PETSC_TRUE; 3168 mat->assembled = PETSC_TRUE; 3169 mat->insertmode = NOT_SET_VALUES; 3170 3171 a = (Mat_MPIBAIJ *)mat->data; 3172 mat->rmap->bs = matin->rmap->bs; 3173 a->bs2 = oldmat->bs2; 3174 a->mbs = oldmat->mbs; 3175 a->nbs = oldmat->nbs; 3176 a->Mbs = oldmat->Mbs; 3177 a->Nbs = oldmat->Nbs; 3178 3179 a->size = oldmat->size; 3180 a->rank = oldmat->rank; 3181 a->donotstash = oldmat->donotstash; 3182 a->roworiented = oldmat->roworiented; 3183 a->rowindices = NULL; 3184 a->rowvalues = NULL; 3185 a->getrowactive = PETSC_FALSE; 3186 a->barray = NULL; 3187 a->rstartbs = oldmat->rstartbs; 3188 a->rendbs = oldmat->rendbs; 3189 a->cstartbs = oldmat->cstartbs; 3190 a->cendbs = oldmat->cendbs; 3191 3192 /* hash table stuff */ 3193 a->ht = NULL; 3194 a->hd = NULL; 3195 a->ht_size = 0; 3196 a->ht_flag = oldmat->ht_flag; 3197 a->ht_fact = oldmat->ht_fact; 3198 a->ht_total_ct = 0; 3199 a->ht_insert_ct = 0; 3200 3201 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3202 if (oldmat->colmap) { 3203 #if defined(PETSC_USE_CTABLE) 3204 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3205 #else 3206 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3207 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3208 #endif 3209 } else a->colmap = NULL; 3210 3211 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3212 PetscCall(PetscMalloc1(len, &a->garray)); 3213 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3214 } else a->garray = NULL; 3215 3216 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3217 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3218 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3219 3220 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3221 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3222 } 3223 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3224 *newmat = mat; 3225 PetscFunctionReturn(PETSC_SUCCESS); 3226 } 3227 3228 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3229 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3230 { 3231 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3232 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3233 PetscScalar *matvals; 3234 3235 PetscFunctionBegin; 3236 PetscCall(PetscViewerSetUp(viewer)); 3237 3238 /* read in matrix header */ 3239 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3240 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3241 M = header[1]; 3242 N = header[2]; 3243 nz = header[3]; 3244 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3245 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3246 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3247 3248 /* set block sizes from the viewer's .info file */ 3249 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3250 /* set local sizes if not set already */ 3251 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3252 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3253 /* set global sizes if not set already */ 3254 if (mat->rmap->N < 0) mat->rmap->N = M; 3255 if (mat->cmap->N < 0) mat->cmap->N = N; 3256 PetscCall(PetscLayoutSetUp(mat->rmap)); 3257 PetscCall(PetscLayoutSetUp(mat->cmap)); 3258 3259 /* check if the matrix sizes are correct */ 3260 PetscCall(MatGetSize(mat, &rows, &cols)); 3261 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3262 PetscCall(MatGetBlockSize(mat, &bs)); 3263 PetscCall(MatGetLocalSize(mat, &m, &n)); 3264 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3265 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3266 mbs = m / bs; 3267 nbs = n / bs; 3268 3269 /* read in row lengths and build row indices */ 3270 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3271 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3272 rowidxs[0] = 0; 3273 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3274 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3275 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3276 3277 /* read in column indices and matrix values */ 3278 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3279 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3280 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3281 3282 { /* preallocate matrix storage */ 3283 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3284 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3285 PetscBool sbaij, done; 3286 PetscInt *d_nnz, *o_nnz; 3287 3288 PetscCall(PetscBTCreate(nbs, &bt)); 3289 PetscCall(PetscHSetICreate(&ht)); 3290 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3291 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3292 for (i = 0; i < mbs; i++) { 3293 PetscCall(PetscBTMemzero(nbs, bt)); 3294 PetscCall(PetscHSetIClear(ht)); 3295 for (k = 0; k < bs; k++) { 3296 PetscInt row = bs * i + k; 3297 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3298 PetscInt col = colidxs[j]; 3299 if (!sbaij || col >= row) { 3300 if (col >= cs && col < ce) { 3301 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3302 } else { 3303 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3304 if (done) o_nnz[i]++; 3305 } 3306 } 3307 } 3308 } 3309 } 3310 PetscCall(PetscBTDestroy(&bt)); 3311 PetscCall(PetscHSetIDestroy(&ht)); 3312 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3313 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3314 PetscCall(PetscFree2(d_nnz, o_nnz)); 3315 } 3316 3317 /* store matrix values */ 3318 for (i = 0; i < m; i++) { 3319 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3320 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3321 } 3322 3323 PetscCall(PetscFree(rowidxs)); 3324 PetscCall(PetscFree2(colidxs, matvals)); 3325 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3326 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3327 PetscFunctionReturn(PETSC_SUCCESS); 3328 } 3329 3330 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3331 { 3332 PetscBool isbinary; 3333 3334 PetscFunctionBegin; 3335 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3336 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3337 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3338 PetscFunctionReturn(PETSC_SUCCESS); 3339 } 3340 3341 /*@ 3342 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3343 3344 Input Parameters: 3345 + mat - the matrix 3346 - fact - factor 3347 3348 Options Database Key: 3349 . -mat_use_hash_table <fact> - provide the factor 3350 3351 Level: advanced 3352 3353 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3354 @*/ 3355 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3356 { 3357 PetscFunctionBegin; 3358 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3359 PetscFunctionReturn(PETSC_SUCCESS); 3360 } 3361 3362 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3363 { 3364 Mat_MPIBAIJ *baij; 3365 3366 PetscFunctionBegin; 3367 baij = (Mat_MPIBAIJ *)mat->data; 3368 baij->ht_fact = fact; 3369 PetscFunctionReturn(PETSC_SUCCESS); 3370 } 3371 3372 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3373 { 3374 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3375 PetscBool flg; 3376 3377 PetscFunctionBegin; 3378 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3379 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3380 if (Ad) *Ad = a->A; 3381 if (Ao) *Ao = a->B; 3382 if (colmap) *colmap = a->garray; 3383 PetscFunctionReturn(PETSC_SUCCESS); 3384 } 3385 3386 /* 3387 Special version for direct calls from Fortran (to eliminate two function call overheads 3388 */ 3389 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3390 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3391 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3392 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3393 #endif 3394 3395 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3396 /*@C 3397 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3398 3399 Collective 3400 3401 Input Parameters: 3402 + matin - the matrix 3403 . min - number of input rows 3404 . im - input rows 3405 . nin - number of input columns 3406 . in - input columns 3407 . v - numerical values input 3408 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3409 3410 Level: advanced 3411 3412 Developer Notes: 3413 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3414 3415 .seealso: `Mat`, `MatSetValuesBlocked()` 3416 @*/ 3417 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3418 { 3419 /* convert input arguments to C version */ 3420 Mat mat = *matin; 3421 PetscInt m = *min, n = *nin; 3422 InsertMode addv = *addvin; 3423 3424 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3425 const MatScalar *value; 3426 MatScalar *barray = baij->barray; 3427 PetscBool roworiented = baij->roworiented; 3428 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3429 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3430 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3431 3432 PetscFunctionBegin; 3433 /* tasks normally handled by MatSetValuesBlocked() */ 3434 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3435 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3436 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3437 if (mat->assembled) { 3438 mat->was_assembled = PETSC_TRUE; 3439 mat->assembled = PETSC_FALSE; 3440 } 3441 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3442 3443 if (!barray) { 3444 PetscCall(PetscMalloc1(bs2, &barray)); 3445 baij->barray = barray; 3446 } 3447 3448 if (roworiented) stepval = (n - 1) * bs; 3449 else stepval = (m - 1) * bs; 3450 3451 for (i = 0; i < m; i++) { 3452 if (im[i] < 0) continue; 3453 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3454 if (im[i] >= rstart && im[i] < rend) { 3455 row = im[i] - rstart; 3456 for (j = 0; j < n; j++) { 3457 /* If NumCol = 1 then a copy is not required */ 3458 if ((roworiented) && (n == 1)) { 3459 barray = (MatScalar *)v + i * bs2; 3460 } else if ((!roworiented) && (m == 1)) { 3461 barray = (MatScalar *)v + j * bs2; 3462 } else { /* Here a copy is required */ 3463 if (roworiented) { 3464 value = v + i * (stepval + bs) * bs + j * bs; 3465 } else { 3466 value = v + j * (stepval + bs) * bs + i * bs; 3467 } 3468 for (ii = 0; ii < bs; ii++, value += stepval) { 3469 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3470 } 3471 barray -= bs2; 3472 } 3473 3474 if (in[j] >= cstart && in[j] < cend) { 3475 col = in[j] - cstart; 3476 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3477 } else if (in[j] < 0) { 3478 continue; 3479 } else { 3480 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3481 if (mat->was_assembled) { 3482 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3483 3484 #if defined(PETSC_USE_DEBUG) 3485 #if defined(PETSC_USE_CTABLE) 3486 { 3487 PetscInt data; 3488 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3489 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3490 } 3491 #else 3492 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3493 #endif 3494 #endif 3495 #if defined(PETSC_USE_CTABLE) 3496 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3497 col = (col - 1) / bs; 3498 #else 3499 col = (baij->colmap[in[j]] - 1) / bs; 3500 #endif 3501 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3502 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3503 col = in[j]; 3504 } 3505 } else col = in[j]; 3506 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3507 } 3508 } 3509 } else { 3510 if (!baij->donotstash) { 3511 if (roworiented) { 3512 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3513 } else { 3514 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3515 } 3516 } 3517 } 3518 } 3519 3520 /* task normally handled by MatSetValuesBlocked() */ 3521 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3522 PetscFunctionReturn(PETSC_SUCCESS); 3523 } 3524 3525 /*@ 3526 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3527 3528 Collective 3529 3530 Input Parameters: 3531 + comm - MPI communicator 3532 . bs - the block size, only a block size of 1 is supported 3533 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3534 . n - This value should be the same as the local size used in creating the 3535 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3536 calculated if `N` is given) For square matrices `n` is almost always `m`. 3537 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3538 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3539 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3540 . j - column indices 3541 - a - matrix values 3542 3543 Output Parameter: 3544 . mat - the matrix 3545 3546 Level: intermediate 3547 3548 Notes: 3549 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3550 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3551 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3552 3553 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3554 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3555 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3556 with column-major ordering within blocks. 3557 3558 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3559 3560 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3561 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3562 @*/ 3563 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3564 { 3565 PetscFunctionBegin; 3566 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3567 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3568 PetscCall(MatCreate(comm, mat)); 3569 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3570 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3571 PetscCall(MatSetBlockSize(*mat, bs)); 3572 PetscCall(MatSetUp(*mat)); 3573 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3574 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3575 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3576 PetscFunctionReturn(PETSC_SUCCESS); 3577 } 3578 3579 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3580 { 3581 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3582 PetscInt *indx; 3583 PetscScalar *values; 3584 3585 PetscFunctionBegin; 3586 PetscCall(MatGetSize(inmat, &m, &N)); 3587 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3588 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3589 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3590 PetscInt *bindx, rmax = a->rmax, j; 3591 PetscMPIInt rank, size; 3592 3593 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3594 mbs = m / bs; 3595 Nbs = N / cbs; 3596 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3597 nbs = n / cbs; 3598 3599 PetscCall(PetscMalloc1(rmax, &bindx)); 3600 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3601 3602 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3603 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3604 if (rank == size - 1) { 3605 /* Check sum(nbs) = Nbs */ 3606 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3607 } 3608 3609 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3610 for (i = 0; i < mbs; i++) { 3611 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3612 nnz = nnz / bs; 3613 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3614 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3615 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3616 } 3617 PetscCall(PetscFree(bindx)); 3618 3619 PetscCall(MatCreate(comm, outmat)); 3620 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3621 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3622 PetscCall(MatSetType(*outmat, MATBAIJ)); 3623 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3624 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3625 MatPreallocateEnd(dnz, onz); 3626 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3627 } 3628 3629 /* numeric phase */ 3630 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3631 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3632 3633 for (i = 0; i < m; i++) { 3634 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3635 Ii = i + rstart; 3636 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3637 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3638 } 3639 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3640 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3641 PetscFunctionReturn(PETSC_SUCCESS); 3642 } 3643