1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 101 { 102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 103 104 PetscFunctionBegin; 105 PetscCall(MatStoreValues(aij->A)); 106 PetscCall(MatStoreValues(aij->B)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 111 { 112 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 113 114 PetscFunctionBegin; 115 PetscCall(MatRetrieveValues(aij->A)); 116 PetscCall(MatRetrieveValues(aij->B)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 /* 121 Local utility routine that creates a mapping from the global column 122 number to the local number in the off-diagonal part of the local 123 storage of the matrix. This is done in a non scalable way since the 124 length of colmap equals the global matrix length. 125 */ 126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 127 { 128 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 130 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 131 132 PetscFunctionBegin; 133 #if defined(PETSC_USE_CTABLE) 134 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 135 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 136 #else 137 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 138 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 139 #endif 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 144 do { \ 145 brow = row / bs; \ 146 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 147 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 148 rmax = aimax[brow]; \ 149 nrow = ailen[brow]; \ 150 bcol = col / bs; \ 151 ridx = row % bs; \ 152 cidx = col % bs; \ 153 low = 0; \ 154 high = nrow; \ 155 while (high - low > 3) { \ 156 t = (low + high) / 2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i = low; _i < high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2 * _i + bs * cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto a_noinsert; \ 167 } \ 168 } \ 169 if (a->nonew == 1) goto a_noinsert; \ 170 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 171 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 175 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 176 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 177 rp[_i] = bcol; \ 178 ap[bs2 * _i + bs * cidx + ridx] = value; \ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } while (0) 182 183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 184 do { \ 185 brow = row / bs; \ 186 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 187 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 188 rmax = bimax[brow]; \ 189 nrow = bilen[brow]; \ 190 bcol = col / bs; \ 191 ridx = row % bs; \ 192 cidx = col % bs; \ 193 low = 0; \ 194 high = nrow; \ 195 while (high - low > 3) { \ 196 t = (low + high) / 2; \ 197 if (rp[t] > bcol) high = t; \ 198 else low = t; \ 199 } \ 200 for (_i = low; _i < high; _i++) { \ 201 if (rp[_i] > bcol) break; \ 202 if (rp[_i] == bcol) { \ 203 bap = ap + bs2 * _i + bs * cidx + ridx; \ 204 if (addv == ADD_VALUES) *bap += value; \ 205 else *bap = value; \ 206 goto b_noinsert; \ 207 } \ 208 } \ 209 if (b->nonew == 1) goto b_noinsert; \ 210 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 211 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 212 N = nrow++ - 1; \ 213 /* shift up all the later entries in this row */ \ 214 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 215 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 216 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 217 rp[_i] = bcol; \ 218 ap[bs2 * _i + bs * cidx + ridx] = value; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } while (0) 222 223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 224 { 225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscInt i, j, row, col; 229 PetscInt rstart_orig = mat->rmap->rstart; 230 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 231 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 232 233 /* Some Variables required in the macro */ 234 Mat A = baij->A; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 236 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 237 MatScalar *aa = a->a; 238 239 Mat B = baij->B; 240 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 241 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 242 MatScalar *ba = b->a; 243 244 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 245 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 246 MatScalar *ap, *bap; 247 248 PetscFunctionBegin; 249 for (i = 0; i < m; i++) { 250 if (im[i] < 0) continue; 251 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { 253 row = im[i] - rstart_orig; 254 for (j = 0; j < n; j++) { 255 if (in[j] >= cstart_orig && in[j] < cend_orig) { 256 col = in[j] - cstart_orig; 257 if (roworiented) value = v[i * n + j]; 258 else value = v[i + j * m]; 259 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 260 } else if (in[j] < 0) { 261 continue; 262 } else { 263 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 264 if (mat->was_assembled) { 265 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 266 #if defined(PETSC_USE_CTABLE) 267 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 268 col = col - 1; 269 #else 270 col = baij->colmap[in[j] / bs] - 1; 271 #endif 272 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 273 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 274 col = in[j]; 275 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 276 B = baij->B; 277 b = (Mat_SeqBAIJ *)(B)->data; 278 bimax = b->imax; 279 bi = b->i; 280 bilen = b->ilen; 281 bj = b->j; 282 ba = b->a; 283 } else { 284 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 285 col += in[j] % bs; 286 } 287 } else col = in[j]; 288 if (roworiented) value = v[i * n + j]; 289 else value = v[i + j * m]; 290 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 291 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 292 } 293 } 294 } else { 295 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 296 if (!baij->donotstash) { 297 mat->assembled = PETSC_FALSE; 298 if (roworiented) { 299 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 300 } else { 301 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 302 } 303 } 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 310 { 311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 312 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 313 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 314 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 315 PetscBool roworiented = a->roworiented; 316 const PetscScalar *value = v; 317 MatScalar *ap, *aa = a->a, *bap; 318 319 PetscFunctionBegin; 320 rp = aj + ai[row]; 321 ap = aa + bs2 * ai[row]; 322 rmax = imax[row]; 323 nrow = ailen[row]; 324 value = v; 325 low = 0; 326 high = nrow; 327 while (high - low > 7) { 328 t = (low + high) / 2; 329 if (rp[t] > col) high = t; 330 else low = t; 331 } 332 for (i = low; i < high; i++) { 333 if (rp[i] > col) break; 334 if (rp[i] == col) { 335 bap = ap + bs2 * i; 336 if (roworiented) { 337 if (is == ADD_VALUES) { 338 for (ii = 0; ii < bs; ii++) { 339 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 340 } 341 } else { 342 for (ii = 0; ii < bs; ii++) { 343 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 344 } 345 } 346 } else { 347 if (is == ADD_VALUES) { 348 for (ii = 0; ii < bs; ii++, value += bs) { 349 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 350 bap += bs; 351 } 352 } else { 353 for (ii = 0; ii < bs; ii++, value += bs) { 354 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 355 bap += bs; 356 } 357 } 358 } 359 goto noinsert2; 360 } 361 } 362 if (nonew == 1) goto noinsert2; 363 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 364 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 365 N = nrow++ - 1; 366 high++; 367 /* shift up all the later entries in this row */ 368 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 369 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 370 rp[i] = col; 371 bap = ap + bs2 * i; 372 if (roworiented) { 373 for (ii = 0; ii < bs; ii++) { 374 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 375 } 376 } else { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 379 } 380 } 381 noinsert2:; 382 ailen[row] = nrow; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 /* 387 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 388 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 389 */ 390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 393 const PetscScalar *value; 394 MatScalar *barray = baij->barray; 395 PetscBool roworiented = baij->roworiented; 396 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 397 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 398 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 399 400 PetscFunctionBegin; 401 if (!barray) { 402 PetscCall(PetscMalloc1(bs2, &barray)); 403 baij->barray = barray; 404 } 405 406 if (roworiented) stepval = (n - 1) * bs; 407 else stepval = (m - 1) * bs; 408 409 for (i = 0; i < m; i++) { 410 if (im[i] < 0) continue; 411 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 412 if (im[i] >= rstart && im[i] < rend) { 413 row = im[i] - rstart; 414 for (j = 0; j < n; j++) { 415 /* If NumCol = 1 then a copy is not required */ 416 if ((roworiented) && (n == 1)) { 417 barray = (MatScalar *)v + i * bs2; 418 } else if ((!roworiented) && (m == 1)) { 419 barray = (MatScalar *)v + j * bs2; 420 } else { /* Here a copy is required */ 421 if (roworiented) { 422 value = v + (i * (stepval + bs) + j) * bs; 423 } else { 424 value = v + (j * (stepval + bs) + i) * bs; 425 } 426 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 427 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 428 barray += bs; 429 } 430 barray -= bs2; 431 } 432 433 if (in[j] >= cstart && in[j] < cend) { 434 col = in[j] - cstart; 435 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 436 } else if (in[j] < 0) { 437 continue; 438 } else { 439 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 440 if (mat->was_assembled) { 441 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 442 443 #if defined(PETSC_USE_DEBUG) 444 #if defined(PETSC_USE_CTABLE) 445 { 446 PetscInt data; 447 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 448 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 449 } 450 #else 451 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 452 #endif 453 #endif 454 #if defined(PETSC_USE_CTABLE) 455 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 456 col = (col - 1) / bs; 457 #else 458 col = (baij->colmap[in[j]] - 1) / bs; 459 #endif 460 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 461 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 462 col = in[j]; 463 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 464 } else col = in[j]; 465 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 466 } 467 } 468 } else { 469 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 470 if (!baij->donotstash) { 471 if (roworiented) { 472 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 473 } else { 474 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 #define HASH_KEY 0.6180339887 483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 487 { 488 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 489 PetscBool roworiented = baij->roworiented; 490 PetscInt i, j, row, col; 491 PetscInt rstart_orig = mat->rmap->rstart; 492 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 493 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 494 PetscReal tmp; 495 MatScalar **HD = baij->hd, value; 496 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 497 498 PetscFunctionBegin; 499 for (i = 0; i < m; i++) { 500 if (PetscDefined(USE_DEBUG)) { 501 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 502 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 503 } 504 row = im[i]; 505 if (row >= rstart_orig && row < rend_orig) { 506 for (j = 0; j < n; j++) { 507 col = in[j]; 508 if (roworiented) value = v[i * n + j]; 509 else value = v[i + j * m]; 510 /* Look up PetscInto the Hash Table */ 511 key = (row / bs) * Nbs + (col / bs) + 1; 512 h1 = HASH(size, key, tmp); 513 514 idx = h1; 515 if (PetscDefined(USE_DEBUG)) { 516 insert_ct++; 517 total_ct++; 518 if (HT[idx] != key) { 519 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 520 ; 521 if (idx == size) { 522 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 523 ; 524 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 525 } 526 } 527 } else if (HT[idx] != key) { 528 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 529 ; 530 if (idx == size) { 531 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 532 ; 533 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 534 } 535 } 536 /* A HASH table entry is found, so insert the values at the correct address */ 537 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 538 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 539 } 540 } else if (!baij->donotstash) { 541 if (roworiented) { 542 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 543 } else { 544 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 545 } 546 } 547 } 548 if (PetscDefined(USE_DEBUG)) { 549 baij->ht_total_ct += total_ct; 550 baij->ht_insert_ct += insert_ct; 551 } 552 PetscFunctionReturn(PETSC_SUCCESS); 553 } 554 555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 556 { 557 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 558 PetscBool roworiented = baij->roworiented; 559 PetscInt i, j, ii, jj, row, col; 560 PetscInt rstart = baij->rstartbs; 561 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 562 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 563 PetscReal tmp; 564 MatScalar **HD = baij->hd, *baij_a; 565 const PetscScalar *v_t, *value; 566 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 567 568 PetscFunctionBegin; 569 if (roworiented) stepval = (n - 1) * bs; 570 else stepval = (m - 1) * bs; 571 572 for (i = 0; i < m; i++) { 573 if (PetscDefined(USE_DEBUG)) { 574 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 575 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 576 } 577 row = im[i]; 578 v_t = v + i * nbs2; 579 if (row >= rstart && row < rend) { 580 for (j = 0; j < n; j++) { 581 col = in[j]; 582 583 /* Look up into the Hash Table */ 584 key = row * Nbs + col + 1; 585 h1 = HASH(size, key, tmp); 586 587 idx = h1; 588 if (PetscDefined(USE_DEBUG)) { 589 total_ct++; 590 insert_ct++; 591 if (HT[idx] != key) { 592 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 593 ; 594 if (idx == size) { 595 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 596 ; 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 602 ; 603 if (idx == size) { 604 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 605 ; 606 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 607 } 608 } 609 baij_a = HD[idx]; 610 if (roworiented) { 611 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 612 /* value = v + (i*(stepval+bs)+j)*bs; */ 613 value = v_t; 614 v_t += bs; 615 if (addv == ADD_VALUES) { 616 for (ii = 0; ii < bs; ii++, value += stepval) { 617 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 618 } 619 } else { 620 for (ii = 0; ii < bs; ii++, value += stepval) { 621 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 622 } 623 } 624 } else { 625 value = v + j * (stepval + bs) * bs + i * bs; 626 if (addv == ADD_VALUES) { 627 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 628 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 629 } 630 } else { 631 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 632 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 633 } 634 } 635 } 636 } 637 } else { 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 if (PetscDefined(USE_DEBUG)) { 648 baij->ht_total_ct += total_ct; 649 baij->ht_insert_ct += insert_ct; 650 } 651 PetscFunctionReturn(PETSC_SUCCESS); 652 } 653 654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 655 { 656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 657 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 658 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 659 660 PetscFunctionBegin; 661 for (i = 0; i < m; i++) { 662 if (idxm[i] < 0) continue; /* negative row */ 663 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 664 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 665 row = idxm[i] - bsrstart; 666 for (j = 0; j < n; j++) { 667 if (idxn[j] < 0) continue; /* negative column */ 668 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 669 if (idxn[j] >= bscstart && idxn[j] < bscend) { 670 col = idxn[j] - bscstart; 671 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 672 } else { 673 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 674 #if defined(PETSC_USE_CTABLE) 675 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 676 data--; 677 #else 678 data = baij->colmap[idxn[j] / bs] - 1; 679 #endif 680 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 681 else { 682 col = data + idxn[j] % bs; 683 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 684 } 685 } 686 } 687 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 688 } 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 695 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 696 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 697 PetscReal sum = 0.0; 698 MatScalar *v; 699 700 PetscFunctionBegin; 701 if (baij->size == 1) { 702 PetscCall(MatNorm(baij->A, type, nrm)); 703 } else { 704 if (type == NORM_FROBENIUS) { 705 v = amat->a; 706 nz = amat->nz * bs2; 707 for (i = 0; i < nz; i++) { 708 sum += PetscRealPart(PetscConj(*v) * (*v)); 709 v++; 710 } 711 v = bmat->a; 712 nz = bmat->nz * bs2; 713 for (i = 0; i < nz; i++) { 714 sum += PetscRealPart(PetscConj(*v) * (*v)); 715 v++; 716 } 717 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 718 *nrm = PetscSqrtReal(*nrm); 719 } else if (type == NORM_1) { /* max column sum */ 720 PetscReal *tmp, *tmp2; 721 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 749 *nrm = 0.0; 750 for (j = 0; j < mat->cmap->N; j++) { 751 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 752 } 753 PetscCall(PetscFree(tmp)); 754 PetscCall(PetscFree(tmp2)); 755 } else if (type == NORM_INFINITY) { /* max row sum */ 756 PetscReal *sums; 757 PetscCall(PetscMalloc1(bs, &sums)); 758 sum = 0.0; 759 for (j = 0; j < amat->mbs; j++) { 760 for (row = 0; row < bs; row++) sums[row] = 0.0; 761 v = amat->a + bs2 * amat->i[j]; 762 nz = amat->i[j + 1] - amat->i[j]; 763 for (i = 0; i < nz; i++) { 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 sums[row] += PetscAbsScalar(*v); 767 v++; 768 } 769 } 770 } 771 v = bmat->a + bs2 * bmat->i[j]; 772 nz = bmat->i[j + 1] - bmat->i[j]; 773 for (i = 0; i < nz; i++) { 774 for (col = 0; col < bs; col++) { 775 for (row = 0; row < bs; row++) { 776 sums[row] += PetscAbsScalar(*v); 777 v++; 778 } 779 } 780 } 781 for (row = 0; row < bs; row++) { 782 if (sums[row] > sum) sum = sums[row]; 783 } 784 } 785 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 786 PetscCall(PetscFree(sums)); 787 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 788 } 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 /* 793 Creates the hash table, and sets the table 794 This table is created only once. 795 If new entried need to be added to the matrix 796 then the hash table has to be destroyed and 797 recreated. 798 */ 799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 802 Mat A = baij->A, B = baij->B; 803 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 804 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 805 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 806 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 807 PetscInt *HT, key; 808 MatScalar **HD; 809 PetscReal tmp; 810 #if defined(PETSC_USE_INFO) 811 PetscInt ct = 0, max = 0; 812 #endif 813 814 PetscFunctionBegin; 815 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 816 817 baij->ht_size = (PetscInt)(factor * nz); 818 ht_size = baij->ht_size; 819 820 /* Allocate Memory for Hash Table */ 821 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 822 HD = baij->hd; 823 HT = baij->ht; 824 825 /* Loop Over A */ 826 for (i = 0; i < a->mbs; i++) { 827 for (j = ai[i]; j < ai[i + 1]; j++) { 828 row = i + rstart; 829 col = aj[j] + cstart; 830 831 key = row * Nbs + col + 1; 832 h1 = HASH(ht_size, key, tmp); 833 for (k = 0; k < ht_size; k++) { 834 if (!HT[(h1 + k) % ht_size]) { 835 HT[(h1 + k) % ht_size] = key; 836 HD[(h1 + k) % ht_size] = a->a + j * bs2; 837 break; 838 #if defined(PETSC_USE_INFO) 839 } else { 840 ct++; 841 #endif 842 } 843 } 844 #if defined(PETSC_USE_INFO) 845 if (k > max) max = k; 846 #endif 847 } 848 } 849 /* Loop Over B */ 850 for (i = 0; i < b->mbs; i++) { 851 for (j = bi[i]; j < bi[i + 1]; j++) { 852 row = i + rstart; 853 col = garray[bj[j]]; 854 key = row * Nbs + col + 1; 855 h1 = HASH(ht_size, key, tmp); 856 for (k = 0; k < ht_size; k++) { 857 if (!HT[(h1 + k) % ht_size]) { 858 HT[(h1 + k) % ht_size] = key; 859 HD[(h1 + k) % ht_size] = b->a + j * bs2; 860 break; 861 #if defined(PETSC_USE_INFO) 862 } else { 863 ct++; 864 #endif 865 } 866 } 867 #if defined(PETSC_USE_INFO) 868 if (k > max) max = k; 869 #endif 870 } 871 } 872 873 /* Print Summary */ 874 #if defined(PETSC_USE_INFO) 875 for (i = 0, j = 0; i < ht_size; i++) { 876 if (HT[i]) j++; 877 } 878 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 879 #endif 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 886 PetscInt nstash, reallocs; 887 888 PetscFunctionBegin; 889 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 890 891 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 893 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 901 { 902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 904 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 905 PetscInt *row, *col; 906 PetscBool r1, r2, r3, other_disassembled; 907 MatScalar *val; 908 PetscMPIInt n; 909 910 PetscFunctionBegin; 911 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 912 if (!baij->donotstash && !mat->nooffprocentries) { 913 while (1) { 914 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 915 if (!flg) break; 916 917 for (i = 0; i < n;) { 918 /* Now identify the consecutive vals belonging to the same row */ 919 for (j = i, rstart = row[j]; j < n; j++) { 920 if (row[j] != rstart) break; 921 } 922 if (j < n) ncols = j - i; 923 else ncols = n - i; 924 /* Now assemble all these values with a single function call */ 925 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 926 i = j; 927 } 928 } 929 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 930 /* Now process the block-stash. Since the values are stashed column-oriented, 931 set the row-oriented flag to column-oriented, and after MatSetValues() 932 restore the original flags */ 933 r1 = baij->roworiented; 934 r2 = a->roworiented; 935 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 936 937 baij->roworiented = PETSC_FALSE; 938 a->roworiented = PETSC_FALSE; 939 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 940 while (1) { 941 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 942 if (!flg) break; 943 944 for (i = 0; i < n;) { 945 /* Now identify the consecutive vals belonging to the same row */ 946 for (j = i, rstart = row[j]; j < n; j++) { 947 if (row[j] != rstart) break; 948 } 949 if (j < n) ncols = j - i; 950 else ncols = n - i; 951 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 952 i = j; 953 } 954 } 955 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 956 957 baij->roworiented = r1; 958 a->roworiented = r2; 959 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 960 } 961 962 PetscCall(MatAssemblyBegin(baij->A, mode)); 963 PetscCall(MatAssemblyEnd(baij->A, mode)); 964 965 /* determine if any processor has disassembled, if so we must 966 also disassemble ourselves, in order that we may reassemble. */ 967 /* 968 if nonzero structure of submatrix B cannot change then we know that 969 no processor disassembled thus we can skip this stuff 970 */ 971 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 972 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 973 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 974 } 975 976 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 977 PetscCall(MatAssemblyBegin(baij->B, mode)); 978 PetscCall(MatAssemblyEnd(baij->B, mode)); 979 980 #if defined(PETSC_USE_INFO) 981 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 982 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 983 984 baij->ht_total_ct = 0; 985 baij->ht_insert_ct = 0; 986 } 987 #endif 988 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 989 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 990 991 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 992 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 993 } 994 995 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 996 997 baij->rowvalues = NULL; 998 999 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1000 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 1001 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1002 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1003 } 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1008 #include <petscdraw.h> 1009 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1010 { 1011 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1012 PetscMPIInt rank = baij->rank; 1013 PetscInt bs = mat->rmap->bs; 1014 PetscBool iascii, isdraw; 1015 PetscViewer sviewer; 1016 PetscViewerFormat format; 1017 1018 PetscFunctionBegin; 1019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1021 if (iascii) { 1022 PetscCall(PetscViewerGetFormat(viewer, &format)); 1023 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1024 MatInfo info; 1025 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1026 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1027 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1028 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1029 mat->rmap->bs, (double)info.memory)); 1030 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1031 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1032 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1033 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1034 PetscCall(PetscViewerFlush(viewer)); 1035 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1036 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1037 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1040 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 } 1046 1047 if (isdraw) { 1048 PetscDraw draw; 1049 PetscBool isnull; 1050 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1051 PetscCall(PetscDrawIsNull(draw, &isnull)); 1052 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 { 1056 /* assemble the entire matrix onto first processor. */ 1057 Mat A; 1058 Mat_SeqBAIJ *Aloc; 1059 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1060 MatScalar *a; 1061 const char *matname; 1062 1063 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1064 /* Perhaps this should be the type of mat? */ 1065 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1066 if (rank == 0) { 1067 PetscCall(MatSetSizes(A, M, N, M, N)); 1068 } else { 1069 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1070 } 1071 PetscCall(MatSetType(A, MATMPIBAIJ)); 1072 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1073 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1074 1075 /* copy over the A part */ 1076 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1077 ai = Aloc->i; 1078 aj = Aloc->j; 1079 a = Aloc->a; 1080 PetscCall(PetscMalloc1(bs, &rvals)); 1081 1082 for (i = 0; i < mbs; i++) { 1083 rvals[0] = bs * (baij->rstartbs + i); 1084 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1085 for (j = ai[i]; j < ai[i + 1]; j++) { 1086 col = (baij->cstartbs + aj[j]) * bs; 1087 for (k = 0; k < bs; k++) { 1088 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1089 col++; 1090 a += bs; 1091 } 1092 } 1093 } 1094 /* copy over the B part */ 1095 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1096 ai = Aloc->i; 1097 aj = Aloc->j; 1098 a = Aloc->a; 1099 for (i = 0; i < mbs; i++) { 1100 rvals[0] = bs * (baij->rstartbs + i); 1101 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1102 for (j = ai[i]; j < ai[i + 1]; j++) { 1103 col = baij->garray[aj[j]] * bs; 1104 for (k = 0; k < bs; k++) { 1105 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1106 col++; 1107 a += bs; 1108 } 1109 } 1110 } 1111 PetscCall(PetscFree(rvals)); 1112 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1113 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1114 /* 1115 Everyone has to call to draw the matrix since the graphics waits are 1116 synchronized across all processors that share the PetscDraw object 1117 */ 1118 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1119 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1120 if (rank == 0) { 1121 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1122 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1123 } 1124 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1125 PetscCall(MatDestroy(&A)); 1126 } 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1131 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1132 { 1133 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1134 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1135 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1136 const PetscInt *garray = aij->garray; 1137 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1138 PetscInt64 nz, hnz; 1139 PetscInt *rowlens, *colidxs; 1140 PetscScalar *matvals; 1141 PetscMPIInt rank; 1142 1143 PetscFunctionBegin; 1144 PetscCall(PetscViewerSetUp(viewer)); 1145 1146 M = mat->rmap->N; 1147 N = mat->cmap->N; 1148 m = mat->rmap->n; 1149 rs = mat->rmap->rstart; 1150 cs = mat->cmap->rstart; 1151 bs = mat->rmap->bs; 1152 nz = bs * bs * (A->nz + B->nz); 1153 1154 /* write matrix header */ 1155 header[0] = MAT_FILE_CLASSID; 1156 header[1] = M; 1157 header[2] = N; 1158 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1159 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1160 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1161 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1162 1163 /* fill in and store row lengths */ 1164 PetscCall(PetscMalloc1(m, &rowlens)); 1165 for (cnt = 0, i = 0; i < A->mbs; i++) 1166 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1167 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1168 PetscCall(PetscFree(rowlens)); 1169 1170 /* fill in and store column indices */ 1171 PetscCall(PetscMalloc1(nz, &colidxs)); 1172 for (cnt = 0, i = 0; i < A->mbs; i++) { 1173 for (k = 0; k < bs; k++) { 1174 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1175 if (garray[B->j[jb]] > cs / bs) break; 1176 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1177 } 1178 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1179 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1180 for (; jb < B->i[i + 1]; jb++) 1181 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1182 } 1183 } 1184 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1185 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1186 PetscCall(PetscFree(colidxs)); 1187 1188 /* fill in and store nonzero values */ 1189 PetscCall(PetscMalloc1(nz, &matvals)); 1190 for (cnt = 0, i = 0; i < A->mbs; i++) { 1191 for (k = 0; k < bs; k++) { 1192 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1193 if (garray[B->j[jb]] > cs / bs) break; 1194 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1195 } 1196 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1197 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1198 for (; jb < B->i[i + 1]; jb++) 1199 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1200 } 1201 } 1202 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1203 PetscCall(PetscFree(matvals)); 1204 1205 /* write block size option to the viewer's .info file */ 1206 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1207 PetscFunctionReturn(PETSC_SUCCESS); 1208 } 1209 1210 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1211 { 1212 PetscBool iascii, isdraw, issocket, isbinary; 1213 1214 PetscFunctionBegin; 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1219 if (iascii || isdraw || issocket) { 1220 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1221 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1222 PetscFunctionReturn(PETSC_SUCCESS); 1223 } 1224 1225 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1226 { 1227 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1228 PetscInt nt; 1229 1230 PetscFunctionBegin; 1231 PetscCall(VecGetLocalSize(xx, &nt)); 1232 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1233 PetscCall(VecGetLocalSize(yy, &nt)); 1234 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1235 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1236 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1237 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1238 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1239 PetscFunctionReturn(PETSC_SUCCESS); 1240 } 1241 1242 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1243 { 1244 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1245 1246 PetscFunctionBegin; 1247 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1248 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1249 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1250 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1251 PetscFunctionReturn(PETSC_SUCCESS); 1252 } 1253 1254 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1255 { 1256 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1257 1258 PetscFunctionBegin; 1259 /* do nondiagonal part */ 1260 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1261 /* do local part */ 1262 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1263 /* add partial results together */ 1264 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1265 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1266 PetscFunctionReturn(PETSC_SUCCESS); 1267 } 1268 1269 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1270 { 1271 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1272 1273 PetscFunctionBegin; 1274 /* do nondiagonal part */ 1275 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1276 /* do local part */ 1277 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1278 /* add partial results together */ 1279 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1280 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1281 PetscFunctionReturn(PETSC_SUCCESS); 1282 } 1283 1284 /* 1285 This only works correctly for square matrices where the subblock A->A is the 1286 diagonal block 1287 */ 1288 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1289 { 1290 PetscFunctionBegin; 1291 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1292 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 1296 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1297 { 1298 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1299 1300 PetscFunctionBegin; 1301 PetscCall(MatScale(a->A, aa)); 1302 PetscCall(MatScale(a->B, aa)); 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } 1305 1306 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1307 { 1308 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1309 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1310 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1311 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1312 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1313 1314 PetscFunctionBegin; 1315 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1316 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1317 mat->getrowactive = PETSC_TRUE; 1318 1319 if (!mat->rowvalues && (idx || v)) { 1320 /* 1321 allocate enough space to hold information from the longest row. 1322 */ 1323 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1324 PetscInt max = 1, mbs = mat->mbs, tmp; 1325 for (i = 0; i < mbs; i++) { 1326 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1327 if (max < tmp) max = tmp; 1328 } 1329 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1330 } 1331 lrow = row - brstart; 1332 1333 pvA = &vworkA; 1334 pcA = &cworkA; 1335 pvB = &vworkB; 1336 pcB = &cworkB; 1337 if (!v) { 1338 pvA = NULL; 1339 pvB = NULL; 1340 } 1341 if (!idx) { 1342 pcA = NULL; 1343 if (!v) pcB = NULL; 1344 } 1345 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1346 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1347 nztot = nzA + nzB; 1348 1349 cmap = mat->garray; 1350 if (v || idx) { 1351 if (nztot) { 1352 /* Sort by increasing column numbers, assuming A and B already sorted */ 1353 PetscInt imark = -1; 1354 if (v) { 1355 *v = v_p = mat->rowvalues; 1356 for (i = 0; i < nzB; i++) { 1357 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1358 else break; 1359 } 1360 imark = i; 1361 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1362 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1363 } 1364 if (idx) { 1365 *idx = idx_p = mat->rowindices; 1366 if (imark > -1) { 1367 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1368 } else { 1369 for (i = 0; i < nzB; i++) { 1370 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1371 else break; 1372 } 1373 imark = i; 1374 } 1375 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1376 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1377 } 1378 } else { 1379 if (idx) *idx = NULL; 1380 if (v) *v = NULL; 1381 } 1382 } 1383 *nz = nztot; 1384 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1385 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1390 { 1391 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1392 1393 PetscFunctionBegin; 1394 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1395 baij->getrowactive = PETSC_FALSE; 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1400 { 1401 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1402 1403 PetscFunctionBegin; 1404 PetscCall(MatZeroEntries(l->A)); 1405 PetscCall(MatZeroEntries(l->B)); 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1410 { 1411 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1412 Mat A = a->A, B = a->B; 1413 PetscLogDouble isend[5], irecv[5]; 1414 1415 PetscFunctionBegin; 1416 info->block_size = (PetscReal)matin->rmap->bs; 1417 1418 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1419 1420 isend[0] = info->nz_used; 1421 isend[1] = info->nz_allocated; 1422 isend[2] = info->nz_unneeded; 1423 isend[3] = info->memory; 1424 isend[4] = info->mallocs; 1425 1426 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1427 1428 isend[0] += info->nz_used; 1429 isend[1] += info->nz_allocated; 1430 isend[2] += info->nz_unneeded; 1431 isend[3] += info->memory; 1432 isend[4] += info->mallocs; 1433 1434 if (flag == MAT_LOCAL) { 1435 info->nz_used = isend[0]; 1436 info->nz_allocated = isend[1]; 1437 info->nz_unneeded = isend[2]; 1438 info->memory = isend[3]; 1439 info->mallocs = isend[4]; 1440 } else if (flag == MAT_GLOBAL_MAX) { 1441 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1442 1443 info->nz_used = irecv[0]; 1444 info->nz_allocated = irecv[1]; 1445 info->nz_unneeded = irecv[2]; 1446 info->memory = irecv[3]; 1447 info->mallocs = irecv[4]; 1448 } else if (flag == MAT_GLOBAL_SUM) { 1449 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1450 1451 info->nz_used = irecv[0]; 1452 info->nz_allocated = irecv[1]; 1453 info->nz_unneeded = irecv[2]; 1454 info->memory = irecv[3]; 1455 info->mallocs = irecv[4]; 1456 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1457 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1458 info->fill_ratio_needed = 0; 1459 info->factor_mallocs = 0; 1460 PetscFunctionReturn(PETSC_SUCCESS); 1461 } 1462 1463 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1464 { 1465 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1466 1467 PetscFunctionBegin; 1468 switch (op) { 1469 case MAT_NEW_NONZERO_LOCATIONS: 1470 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1471 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1472 case MAT_KEEP_NONZERO_PATTERN: 1473 case MAT_NEW_NONZERO_LOCATION_ERR: 1474 MatCheckPreallocated(A, 1); 1475 PetscCall(MatSetOption(a->A, op, flg)); 1476 PetscCall(MatSetOption(a->B, op, flg)); 1477 break; 1478 case MAT_ROW_ORIENTED: 1479 MatCheckPreallocated(A, 1); 1480 a->roworiented = flg; 1481 1482 PetscCall(MatSetOption(a->A, op, flg)); 1483 PetscCall(MatSetOption(a->B, op, flg)); 1484 break; 1485 case MAT_FORCE_DIAGONAL_ENTRIES: 1486 case MAT_SORTED_FULL: 1487 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1488 break; 1489 case MAT_IGNORE_OFF_PROC_ENTRIES: 1490 a->donotstash = flg; 1491 break; 1492 case MAT_USE_HASH_TABLE: 1493 a->ht_flag = flg; 1494 a->ht_fact = 1.39; 1495 break; 1496 case MAT_SYMMETRIC: 1497 case MAT_STRUCTURALLY_SYMMETRIC: 1498 case MAT_HERMITIAN: 1499 case MAT_SUBMAT_SINGLEIS: 1500 case MAT_SYMMETRY_ETERNAL: 1501 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1502 case MAT_SPD_ETERNAL: 1503 /* if the diagonal matrix is square it inherits some of the properties above */ 1504 break; 1505 default: 1506 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1507 } 1508 PetscFunctionReturn(PETSC_SUCCESS); 1509 } 1510 1511 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1512 { 1513 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1514 Mat_SeqBAIJ *Aloc; 1515 Mat B; 1516 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1517 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1518 MatScalar *a; 1519 1520 PetscFunctionBegin; 1521 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1522 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1523 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1524 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1525 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1526 /* Do not know preallocation information, but must set block size */ 1527 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1528 } else { 1529 B = *matout; 1530 } 1531 1532 /* copy over the A part */ 1533 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1534 ai = Aloc->i; 1535 aj = Aloc->j; 1536 a = Aloc->a; 1537 PetscCall(PetscMalloc1(bs, &rvals)); 1538 1539 for (i = 0; i < mbs; i++) { 1540 rvals[0] = bs * (baij->rstartbs + i); 1541 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1542 for (j = ai[i]; j < ai[i + 1]; j++) { 1543 col = (baij->cstartbs + aj[j]) * bs; 1544 for (k = 0; k < bs; k++) { 1545 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1546 1547 col++; 1548 a += bs; 1549 } 1550 } 1551 } 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1554 ai = Aloc->i; 1555 aj = Aloc->j; 1556 a = Aloc->a; 1557 for (i = 0; i < mbs; i++) { 1558 rvals[0] = bs * (baij->rstartbs + i); 1559 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1560 for (j = ai[i]; j < ai[i + 1]; j++) { 1561 col = baij->garray[aj[j]] * bs; 1562 for (k = 0; k < bs; k++) { 1563 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1564 col++; 1565 a += bs; 1566 } 1567 } 1568 } 1569 PetscCall(PetscFree(rvals)); 1570 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1571 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1572 1573 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1574 else PetscCall(MatHeaderMerge(A, &B)); 1575 PetscFunctionReturn(PETSC_SUCCESS); 1576 } 1577 1578 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1579 { 1580 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1581 Mat a = baij->A, b = baij->B; 1582 PetscInt s1, s2, s3; 1583 1584 PetscFunctionBegin; 1585 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1586 if (rr) { 1587 PetscCall(VecGetLocalSize(rr, &s1)); 1588 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1589 /* Overlap communication with computation. */ 1590 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1591 } 1592 if (ll) { 1593 PetscCall(VecGetLocalSize(ll, &s1)); 1594 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1595 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1596 } 1597 /* scale the diagonal block */ 1598 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1599 1600 if (rr) { 1601 /* Do a scatter end and then right scale the off-diagonal block */ 1602 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1603 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1604 } 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1609 { 1610 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1611 PetscInt *lrows; 1612 PetscInt r, len; 1613 PetscBool cong; 1614 1615 PetscFunctionBegin; 1616 /* get locally owned rows */ 1617 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1618 /* fix right hand side if needed */ 1619 if (x && b) { 1620 const PetscScalar *xx; 1621 PetscScalar *bb; 1622 1623 PetscCall(VecGetArrayRead(x, &xx)); 1624 PetscCall(VecGetArray(b, &bb)); 1625 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1626 PetscCall(VecRestoreArrayRead(x, &xx)); 1627 PetscCall(VecRestoreArray(b, &bb)); 1628 } 1629 1630 /* actually zap the local rows */ 1631 /* 1632 Zero the required rows. If the "diagonal block" of the matrix 1633 is square and the user wishes to set the diagonal we use separate 1634 code so that MatSetValues() is not called for each diagonal allocating 1635 new memory, thus calling lots of mallocs and slowing things down. 1636 1637 */ 1638 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1639 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1640 PetscCall(MatHasCongruentLayouts(A, &cong)); 1641 if ((diag != 0.0) && cong) { 1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1643 } else if (diag != 0.0) { 1644 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1645 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1646 for (r = 0; r < len; ++r) { 1647 const PetscInt row = lrows[r] + A->rmap->rstart; 1648 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1649 } 1650 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1651 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1652 } else { 1653 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1654 } 1655 PetscCall(PetscFree(lrows)); 1656 1657 /* only change matrix nonzero state if pattern was allowed to be changed */ 1658 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1659 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1660 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1661 } 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1666 { 1667 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1668 PetscMPIInt n = A->rmap->n, p = 0; 1669 PetscInt i, j, k, r, len = 0, row, col, count; 1670 PetscInt *lrows, *owners = A->rmap->range; 1671 PetscSFNode *rrows; 1672 PetscSF sf; 1673 const PetscScalar *xx; 1674 PetscScalar *bb, *mask; 1675 Vec xmask, lmask; 1676 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1677 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1678 PetscScalar *aa; 1679 1680 PetscFunctionBegin; 1681 /* Create SF where leaves are input rows and roots are owned rows */ 1682 PetscCall(PetscMalloc1(n, &lrows)); 1683 for (r = 0; r < n; ++r) lrows[r] = -1; 1684 PetscCall(PetscMalloc1(N, &rrows)); 1685 for (r = 0; r < N; ++r) { 1686 const PetscInt idx = rows[r]; 1687 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1688 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1689 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1690 } 1691 rrows[r].rank = p; 1692 rrows[r].index = rows[r] - owners[p]; 1693 } 1694 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1695 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1696 /* Collect flags for rows to be zeroed */ 1697 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1698 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1699 PetscCall(PetscSFDestroy(&sf)); 1700 /* Compress and put in row numbers */ 1701 for (r = 0; r < n; ++r) 1702 if (lrows[r] >= 0) lrows[len++] = r; 1703 /* zero diagonal part of matrix */ 1704 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1705 /* handle off-diagonal part of matrix */ 1706 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1707 PetscCall(VecDuplicate(l->lvec, &lmask)); 1708 PetscCall(VecGetArray(xmask, &bb)); 1709 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1710 PetscCall(VecRestoreArray(xmask, &bb)); 1711 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1712 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1713 PetscCall(VecDestroy(&xmask)); 1714 if (x) { 1715 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1716 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1717 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1718 PetscCall(VecGetArray(b, &bb)); 1719 } 1720 PetscCall(VecGetArray(lmask, &mask)); 1721 /* remove zeroed rows of off-diagonal matrix */ 1722 for (i = 0; i < len; ++i) { 1723 row = lrows[i]; 1724 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1725 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1726 for (k = 0; k < count; ++k) { 1727 aa[0] = 0.0; 1728 aa += bs; 1729 } 1730 } 1731 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1732 for (i = 0; i < l->B->rmap->N; ++i) { 1733 row = i / bs; 1734 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1735 for (k = 0; k < bs; ++k) { 1736 col = bs * baij->j[j] + k; 1737 if (PetscAbsScalar(mask[col])) { 1738 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1739 if (x) bb[i] -= aa[0] * xx[col]; 1740 aa[0] = 0.0; 1741 } 1742 } 1743 } 1744 } 1745 if (x) { 1746 PetscCall(VecRestoreArray(b, &bb)); 1747 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1748 } 1749 PetscCall(VecRestoreArray(lmask, &mask)); 1750 PetscCall(VecDestroy(&lmask)); 1751 PetscCall(PetscFree(lrows)); 1752 1753 /* only change matrix nonzero state if pattern was allowed to be changed */ 1754 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1755 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1756 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1757 } 1758 PetscFunctionReturn(PETSC_SUCCESS); 1759 } 1760 1761 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1762 { 1763 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1764 1765 PetscFunctionBegin; 1766 PetscCall(MatSetUnfactored(a->A)); 1767 PetscFunctionReturn(PETSC_SUCCESS); 1768 } 1769 1770 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1771 1772 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1773 { 1774 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1775 Mat a, b, c, d; 1776 PetscBool flg; 1777 1778 PetscFunctionBegin; 1779 a = matA->A; 1780 b = matA->B; 1781 c = matB->A; 1782 d = matB->B; 1783 1784 PetscCall(MatEqual(a, c, &flg)); 1785 if (flg) PetscCall(MatEqual(b, d, &flg)); 1786 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1787 PetscFunctionReturn(PETSC_SUCCESS); 1788 } 1789 1790 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1791 { 1792 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1793 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1794 1795 PetscFunctionBegin; 1796 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1797 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1798 PetscCall(MatCopy_Basic(A, B, str)); 1799 } else { 1800 PetscCall(MatCopy(a->A, b->A, str)); 1801 PetscCall(MatCopy(a->B, b->B, str)); 1802 } 1803 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1808 { 1809 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1810 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1811 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1812 1813 PetscFunctionBegin; 1814 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1815 PetscFunctionReturn(PETSC_SUCCESS); 1816 } 1817 1818 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1819 { 1820 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1821 PetscBLASInt bnz, one = 1; 1822 Mat_SeqBAIJ *x, *y; 1823 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1824 1825 PetscFunctionBegin; 1826 if (str == SAME_NONZERO_PATTERN) { 1827 PetscScalar alpha = a; 1828 x = (Mat_SeqBAIJ *)xx->A->data; 1829 y = (Mat_SeqBAIJ *)yy->A->data; 1830 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1831 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1832 x = (Mat_SeqBAIJ *)xx->B->data; 1833 y = (Mat_SeqBAIJ *)yy->B->data; 1834 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1835 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1836 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1837 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1838 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1839 } else { 1840 Mat B; 1841 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1842 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1843 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1844 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1845 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1846 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1847 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1848 PetscCall(MatSetType(B, MATMPIBAIJ)); 1849 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1850 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1851 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1852 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1853 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1854 PetscCall(MatHeaderMerge(Y, &B)); 1855 PetscCall(PetscFree(nnz_d)); 1856 PetscCall(PetscFree(nnz_o)); 1857 } 1858 PetscFunctionReturn(PETSC_SUCCESS); 1859 } 1860 1861 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1862 { 1863 PetscFunctionBegin; 1864 if (PetscDefined(USE_COMPLEX)) { 1865 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1866 1867 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1868 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1869 } 1870 PetscFunctionReturn(PETSC_SUCCESS); 1871 } 1872 1873 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1874 { 1875 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1876 1877 PetscFunctionBegin; 1878 PetscCall(MatRealPart(a->A)); 1879 PetscCall(MatRealPart(a->B)); 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1884 { 1885 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1886 1887 PetscFunctionBegin; 1888 PetscCall(MatImaginaryPart(a->A)); 1889 PetscCall(MatImaginaryPart(a->B)); 1890 PetscFunctionReturn(PETSC_SUCCESS); 1891 } 1892 1893 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1894 { 1895 IS iscol_local; 1896 PetscInt csize; 1897 1898 PetscFunctionBegin; 1899 PetscCall(ISGetLocalSize(iscol, &csize)); 1900 if (call == MAT_REUSE_MATRIX) { 1901 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1902 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1903 } else { 1904 PetscCall(ISAllGather(iscol, &iscol_local)); 1905 } 1906 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1907 if (call == MAT_INITIAL_MATRIX) { 1908 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1909 PetscCall(ISDestroy(&iscol_local)); 1910 } 1911 PetscFunctionReturn(PETSC_SUCCESS); 1912 } 1913 1914 /* 1915 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1916 in local and then by concatenating the local matrices the end result. 1917 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1918 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1919 */ 1920 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1921 { 1922 PetscMPIInt rank, size; 1923 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1924 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1925 Mat M, Mreuse; 1926 MatScalar *vwork, *aa; 1927 MPI_Comm comm; 1928 IS isrow_new, iscol_new; 1929 Mat_SeqBAIJ *aij; 1930 1931 PetscFunctionBegin; 1932 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1933 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1934 PetscCallMPI(MPI_Comm_size(comm, &size)); 1935 /* The compression and expansion should be avoided. Doesn't point 1936 out errors, might change the indices, hence buggey */ 1937 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1938 if (isrow == iscol) { 1939 iscol_new = isrow_new; 1940 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1941 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1942 1943 if (call == MAT_REUSE_MATRIX) { 1944 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1945 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1946 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1947 } else { 1948 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1949 } 1950 PetscCall(ISDestroy(&isrow_new)); 1951 PetscCall(ISDestroy(&iscol_new)); 1952 /* 1953 m - number of local rows 1954 n - number of columns (same on all processors) 1955 rstart - first row in new global matrix generated 1956 */ 1957 PetscCall(MatGetBlockSize(mat, &bs)); 1958 PetscCall(MatGetSize(Mreuse, &m, &n)); 1959 m = m / bs; 1960 n = n / bs; 1961 1962 if (call == MAT_INITIAL_MATRIX) { 1963 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1964 ii = aij->i; 1965 jj = aij->j; 1966 1967 /* 1968 Determine the number of non-zeros in the diagonal and off-diagonal 1969 portions of the matrix in order to do correct preallocation 1970 */ 1971 1972 /* first get start and end of "diagonal" columns */ 1973 if (csize == PETSC_DECIDE) { 1974 PetscCall(ISGetSize(isrow, &mglobal)); 1975 if (mglobal == n * bs) { /* square matrix */ 1976 nlocal = m; 1977 } else { 1978 nlocal = n / size + ((n % size) > rank); 1979 } 1980 } else { 1981 nlocal = csize / bs; 1982 } 1983 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1984 rstart = rend - nlocal; 1985 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1986 1987 /* next, compute all the lengths */ 1988 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1989 for (i = 0; i < m; i++) { 1990 jend = ii[i + 1] - ii[i]; 1991 olen = 0; 1992 dlen = 0; 1993 for (j = 0; j < jend; j++) { 1994 if (*jj < rstart || *jj >= rend) olen++; 1995 else dlen++; 1996 jj++; 1997 } 1998 olens[i] = olen; 1999 dlens[i] = dlen; 2000 } 2001 PetscCall(MatCreate(comm, &M)); 2002 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2003 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2004 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2005 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2006 PetscCall(PetscFree2(dlens, olens)); 2007 } else { 2008 PetscInt ml, nl; 2009 2010 M = *newmat; 2011 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2012 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2013 PetscCall(MatZeroEntries(M)); 2014 /* 2015 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2016 rather than the slower MatSetValues(). 2017 */ 2018 M->was_assembled = PETSC_TRUE; 2019 M->assembled = PETSC_FALSE; 2020 } 2021 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2022 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2023 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2024 ii = aij->i; 2025 jj = aij->j; 2026 aa = aij->a; 2027 for (i = 0; i < m; i++) { 2028 row = rstart / bs + i; 2029 nz = ii[i + 1] - ii[i]; 2030 cwork = jj; 2031 jj = PetscSafePointerPlusOffset(jj, nz); 2032 vwork = aa; 2033 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2034 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2035 } 2036 2037 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2038 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2039 *newmat = M; 2040 2041 /* save submatrix used in processor for next request */ 2042 if (call == MAT_INITIAL_MATRIX) { 2043 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2044 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2045 } 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2050 { 2051 MPI_Comm comm, pcomm; 2052 PetscInt clocal_size, nrows; 2053 const PetscInt *rows; 2054 PetscMPIInt size; 2055 IS crowp, lcolp; 2056 2057 PetscFunctionBegin; 2058 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2059 /* make a collective version of 'rowp' */ 2060 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2061 if (pcomm == comm) { 2062 crowp = rowp; 2063 } else { 2064 PetscCall(ISGetSize(rowp, &nrows)); 2065 PetscCall(ISGetIndices(rowp, &rows)); 2066 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2067 PetscCall(ISRestoreIndices(rowp, &rows)); 2068 } 2069 PetscCall(ISSetPermutation(crowp)); 2070 /* make a local version of 'colp' */ 2071 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2072 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2073 if (size == 1) { 2074 lcolp = colp; 2075 } else { 2076 PetscCall(ISAllGather(colp, &lcolp)); 2077 } 2078 PetscCall(ISSetPermutation(lcolp)); 2079 /* now we just get the submatrix */ 2080 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2081 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2082 /* clean up */ 2083 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2084 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2085 PetscFunctionReturn(PETSC_SUCCESS); 2086 } 2087 2088 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2089 { 2090 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2091 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2092 2093 PetscFunctionBegin; 2094 if (nghosts) *nghosts = B->nbs; 2095 if (ghosts) *ghosts = baij->garray; 2096 PetscFunctionReturn(PETSC_SUCCESS); 2097 } 2098 2099 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2100 { 2101 Mat B; 2102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2103 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2104 Mat_SeqAIJ *b; 2105 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2106 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2107 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2108 2109 PetscFunctionBegin; 2110 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2111 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2112 2113 /* Tell every processor the number of nonzeros per row */ 2114 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2115 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2116 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2117 displs = recvcounts + size; 2118 for (i = 0; i < size; i++) { 2119 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2120 displs[i] = A->rmap->range[i] / bs; 2121 } 2122 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2123 /* Create the sequential matrix of the same type as the local block diagonal */ 2124 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2125 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2126 PetscCall(MatSetType(B, MATSEQAIJ)); 2127 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2128 b = (Mat_SeqAIJ *)B->data; 2129 2130 /* Copy my part of matrix column indices over */ 2131 sendcount = ad->nz + bd->nz; 2132 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2133 a_jsendbuf = ad->j; 2134 b_jsendbuf = bd->j; 2135 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2136 cnt = 0; 2137 for (i = 0; i < n; i++) { 2138 /* put in lower diagonal portion */ 2139 m = bd->i[i + 1] - bd->i[i]; 2140 while (m > 0) { 2141 /* is it above diagonal (in bd (compressed) numbering) */ 2142 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2143 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2144 m--; 2145 } 2146 2147 /* put in diagonal portion */ 2148 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2149 2150 /* put in upper diagonal portion */ 2151 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2152 } 2153 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2154 2155 /* Gather all column indices to all processors */ 2156 for (i = 0; i < size; i++) { 2157 recvcounts[i] = 0; 2158 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2159 } 2160 displs[0] = 0; 2161 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2162 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2163 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2164 /* set the b->ilen (length of each row) values */ 2165 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2166 /* set the b->i indices */ 2167 b->i[0] = 0; 2168 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2169 PetscCall(PetscFree(lens)); 2170 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2171 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2172 PetscCall(PetscFree(recvcounts)); 2173 2174 PetscCall(MatPropagateSymmetryOptions(A, B)); 2175 *newmat = B; 2176 PetscFunctionReturn(PETSC_SUCCESS); 2177 } 2178 2179 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2180 { 2181 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2182 Vec bb1 = NULL; 2183 2184 PetscFunctionBegin; 2185 if (flag == SOR_APPLY_UPPER) { 2186 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2187 PetscFunctionReturn(PETSC_SUCCESS); 2188 } 2189 2190 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2191 2192 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2193 if (flag & SOR_ZERO_INITIAL_GUESS) { 2194 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2195 its--; 2196 } 2197 2198 while (its--) { 2199 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2200 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2201 2202 /* update rhs: bb1 = bb - B*x */ 2203 PetscCall(VecScale(mat->lvec, -1.0)); 2204 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2205 2206 /* local sweep */ 2207 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2208 } 2209 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2210 if (flag & SOR_ZERO_INITIAL_GUESS) { 2211 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2212 its--; 2213 } 2214 while (its--) { 2215 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2216 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2217 2218 /* update rhs: bb1 = bb - B*x */ 2219 PetscCall(VecScale(mat->lvec, -1.0)); 2220 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2221 2222 /* local sweep */ 2223 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2224 } 2225 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2226 if (flag & SOR_ZERO_INITIAL_GUESS) { 2227 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2228 its--; 2229 } 2230 while (its--) { 2231 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2232 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2233 2234 /* update rhs: bb1 = bb - B*x */ 2235 PetscCall(VecScale(mat->lvec, -1.0)); 2236 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2237 2238 /* local sweep */ 2239 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2240 } 2241 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2242 2243 PetscCall(VecDestroy(&bb1)); 2244 PetscFunctionReturn(PETSC_SUCCESS); 2245 } 2246 2247 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2248 { 2249 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2250 PetscInt m, N, i, *garray = aij->garray; 2251 PetscInt ib, jb, bs = A->rmap->bs; 2252 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2253 MatScalar *a_val = a_aij->a; 2254 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2255 MatScalar *b_val = b_aij->a; 2256 PetscReal *work; 2257 2258 PetscFunctionBegin; 2259 PetscCall(MatGetSize(A, &m, &N)); 2260 PetscCall(PetscCalloc1(N, &work)); 2261 if (type == NORM_2) { 2262 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2263 for (jb = 0; jb < bs; jb++) { 2264 for (ib = 0; ib < bs; ib++) { 2265 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2266 a_val++; 2267 } 2268 } 2269 } 2270 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2271 for (jb = 0; jb < bs; jb++) { 2272 for (ib = 0; ib < bs; ib++) { 2273 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2274 b_val++; 2275 } 2276 } 2277 } 2278 } else if (type == NORM_1) { 2279 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2280 for (jb = 0; jb < bs; jb++) { 2281 for (ib = 0; ib < bs; ib++) { 2282 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2283 a_val++; 2284 } 2285 } 2286 } 2287 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2288 for (jb = 0; jb < bs; jb++) { 2289 for (ib = 0; ib < bs; ib++) { 2290 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2291 b_val++; 2292 } 2293 } 2294 } 2295 } else if (type == NORM_INFINITY) { 2296 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2297 for (jb = 0; jb < bs; jb++) { 2298 for (ib = 0; ib < bs; ib++) { 2299 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2300 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2301 a_val++; 2302 } 2303 } 2304 } 2305 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2306 for (jb = 0; jb < bs; jb++) { 2307 for (ib = 0; ib < bs; ib++) { 2308 int col = garray[b_aij->j[i]] * bs + jb; 2309 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2310 b_val++; 2311 } 2312 } 2313 } 2314 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2315 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2316 for (jb = 0; jb < bs; jb++) { 2317 for (ib = 0; ib < bs; ib++) { 2318 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2319 a_val++; 2320 } 2321 } 2322 } 2323 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2324 for (jb = 0; jb < bs; jb++) { 2325 for (ib = 0; ib < bs; ib++) { 2326 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2327 b_val++; 2328 } 2329 } 2330 } 2331 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2332 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2333 for (jb = 0; jb < bs; jb++) { 2334 for (ib = 0; ib < bs; ib++) { 2335 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2336 a_val++; 2337 } 2338 } 2339 } 2340 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2341 for (jb = 0; jb < bs; jb++) { 2342 for (ib = 0; ib < bs; ib++) { 2343 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2344 b_val++; 2345 } 2346 } 2347 } 2348 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2349 if (type == NORM_INFINITY) { 2350 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2351 } else { 2352 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2353 } 2354 PetscCall(PetscFree(work)); 2355 if (type == NORM_2) { 2356 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2357 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2358 for (i = 0; i < N; i++) reductions[i] /= m; 2359 } 2360 PetscFunctionReturn(PETSC_SUCCESS); 2361 } 2362 2363 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2364 { 2365 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2366 2367 PetscFunctionBegin; 2368 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2369 A->factorerrortype = a->A->factorerrortype; 2370 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2371 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2372 PetscFunctionReturn(PETSC_SUCCESS); 2373 } 2374 2375 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2376 { 2377 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2378 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2379 2380 PetscFunctionBegin; 2381 if (!Y->preallocated) { 2382 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2383 } else if (!aij->nz) { 2384 PetscInt nonew = aij->nonew; 2385 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2386 aij->nonew = nonew; 2387 } 2388 PetscCall(MatShift_Basic(Y, a)); 2389 PetscFunctionReturn(PETSC_SUCCESS); 2390 } 2391 2392 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2393 { 2394 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2395 2396 PetscFunctionBegin; 2397 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2398 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2399 if (d) { 2400 PetscInt rstart; 2401 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2402 *d += rstart / A->rmap->bs; 2403 } 2404 PetscFunctionReturn(PETSC_SUCCESS); 2405 } 2406 2407 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2408 { 2409 PetscFunctionBegin; 2410 *a = ((Mat_MPIBAIJ *)A->data)->A; 2411 PetscFunctionReturn(PETSC_SUCCESS); 2412 } 2413 2414 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2415 { 2416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2417 2418 PetscFunctionBegin; 2419 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2420 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2421 PetscFunctionReturn(PETSC_SUCCESS); 2422 } 2423 2424 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2425 MatGetRow_MPIBAIJ, 2426 MatRestoreRow_MPIBAIJ, 2427 MatMult_MPIBAIJ, 2428 /* 4*/ MatMultAdd_MPIBAIJ, 2429 MatMultTranspose_MPIBAIJ, 2430 MatMultTransposeAdd_MPIBAIJ, 2431 NULL, 2432 NULL, 2433 NULL, 2434 /*10*/ NULL, 2435 NULL, 2436 NULL, 2437 MatSOR_MPIBAIJ, 2438 MatTranspose_MPIBAIJ, 2439 /*15*/ MatGetInfo_MPIBAIJ, 2440 MatEqual_MPIBAIJ, 2441 MatGetDiagonal_MPIBAIJ, 2442 MatDiagonalScale_MPIBAIJ, 2443 MatNorm_MPIBAIJ, 2444 /*20*/ MatAssemblyBegin_MPIBAIJ, 2445 MatAssemblyEnd_MPIBAIJ, 2446 MatSetOption_MPIBAIJ, 2447 MatZeroEntries_MPIBAIJ, 2448 /*24*/ MatZeroRows_MPIBAIJ, 2449 NULL, 2450 NULL, 2451 NULL, 2452 NULL, 2453 /*29*/ MatSetUp_MPI_Hash, 2454 NULL, 2455 NULL, 2456 MatGetDiagonalBlock_MPIBAIJ, 2457 NULL, 2458 /*34*/ MatDuplicate_MPIBAIJ, 2459 NULL, 2460 NULL, 2461 NULL, 2462 NULL, 2463 /*39*/ MatAXPY_MPIBAIJ, 2464 MatCreateSubMatrices_MPIBAIJ, 2465 MatIncreaseOverlap_MPIBAIJ, 2466 MatGetValues_MPIBAIJ, 2467 MatCopy_MPIBAIJ, 2468 /*44*/ NULL, 2469 MatScale_MPIBAIJ, 2470 MatShift_MPIBAIJ, 2471 NULL, 2472 MatZeroRowsColumns_MPIBAIJ, 2473 /*49*/ NULL, 2474 NULL, 2475 NULL, 2476 NULL, 2477 NULL, 2478 /*54*/ MatFDColoringCreate_MPIXAIJ, 2479 NULL, 2480 MatSetUnfactored_MPIBAIJ, 2481 MatPermute_MPIBAIJ, 2482 MatSetValuesBlocked_MPIBAIJ, 2483 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2484 MatDestroy_MPIBAIJ, 2485 MatView_MPIBAIJ, 2486 NULL, 2487 NULL, 2488 /*64*/ NULL, 2489 NULL, 2490 NULL, 2491 NULL, 2492 NULL, 2493 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2494 NULL, 2495 NULL, 2496 NULL, 2497 NULL, 2498 /*74*/ NULL, 2499 MatFDColoringApply_BAIJ, 2500 NULL, 2501 NULL, 2502 NULL, 2503 /*79*/ NULL, 2504 NULL, 2505 NULL, 2506 NULL, 2507 MatLoad_MPIBAIJ, 2508 /*84*/ NULL, 2509 NULL, 2510 NULL, 2511 NULL, 2512 NULL, 2513 /*89*/ NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 /*94*/ NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 /*99*/ NULL, 2524 NULL, 2525 NULL, 2526 MatConjugate_MPIBAIJ, 2527 NULL, 2528 /*104*/ NULL, 2529 MatRealPart_MPIBAIJ, 2530 MatImaginaryPart_MPIBAIJ, 2531 NULL, 2532 NULL, 2533 /*109*/ NULL, 2534 NULL, 2535 NULL, 2536 NULL, 2537 MatMissingDiagonal_MPIBAIJ, 2538 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2539 NULL, 2540 MatGetGhosts_MPIBAIJ, 2541 NULL, 2542 NULL, 2543 /*119*/ NULL, 2544 NULL, 2545 NULL, 2546 NULL, 2547 MatGetMultiProcBlock_MPIBAIJ, 2548 /*124*/ NULL, 2549 MatGetColumnReductions_MPIBAIJ, 2550 MatInvertBlockDiagonal_MPIBAIJ, 2551 NULL, 2552 NULL, 2553 /*129*/ NULL, 2554 NULL, 2555 NULL, 2556 NULL, 2557 NULL, 2558 /*134*/ NULL, 2559 NULL, 2560 NULL, 2561 NULL, 2562 NULL, 2563 /*139*/ MatSetBlockSizes_Default, 2564 NULL, 2565 NULL, 2566 MatFDColoringSetUp_MPIXAIJ, 2567 NULL, 2568 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2569 NULL, 2570 NULL, 2571 NULL, 2572 NULL, 2573 NULL, 2574 /*150*/ NULL, 2575 MatEliminateZeros_MPIBAIJ, 2576 NULL}; 2577 2578 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2579 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2580 2581 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2582 { 2583 PetscInt m, rstart, cstart, cend; 2584 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2585 const PetscInt *JJ = NULL; 2586 PetscScalar *values = NULL; 2587 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2588 PetscBool nooffprocentries; 2589 2590 PetscFunctionBegin; 2591 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2592 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2593 PetscCall(PetscLayoutSetUp(B->rmap)); 2594 PetscCall(PetscLayoutSetUp(B->cmap)); 2595 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2596 m = B->rmap->n / bs; 2597 rstart = B->rmap->rstart / bs; 2598 cstart = B->cmap->rstart / bs; 2599 cend = B->cmap->rend / bs; 2600 2601 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2602 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2603 for (i = 0; i < m; i++) { 2604 nz = ii[i + 1] - ii[i]; 2605 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2606 nz_max = PetscMax(nz_max, nz); 2607 dlen = 0; 2608 olen = 0; 2609 JJ = jj + ii[i]; 2610 for (j = 0; j < nz; j++) { 2611 if (*JJ < cstart || *JJ >= cend) olen++; 2612 else dlen++; 2613 JJ++; 2614 } 2615 d_nnz[i] = dlen; 2616 o_nnz[i] = olen; 2617 } 2618 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2619 PetscCall(PetscFree2(d_nnz, o_nnz)); 2620 2621 values = (PetscScalar *)V; 2622 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2623 for (i = 0; i < m; i++) { 2624 PetscInt row = i + rstart; 2625 PetscInt ncols = ii[i + 1] - ii[i]; 2626 const PetscInt *icols = jj + ii[i]; 2627 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2628 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2629 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2630 } else { /* block ordering does not match so we can only insert one block at a time. */ 2631 PetscInt j; 2632 for (j = 0; j < ncols; j++) { 2633 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2634 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2635 } 2636 } 2637 } 2638 2639 if (!V) PetscCall(PetscFree(values)); 2640 nooffprocentries = B->nooffprocentries; 2641 B->nooffprocentries = PETSC_TRUE; 2642 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2643 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2644 B->nooffprocentries = nooffprocentries; 2645 2646 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 /*@C 2651 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2652 2653 Collective 2654 2655 Input Parameters: 2656 + B - the matrix 2657 . bs - the block size 2658 . i - the indices into `j` for the start of each local row (starts with zero) 2659 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2660 - v - optional values in the matrix, use `NULL` if not provided 2661 2662 Level: advanced 2663 2664 Notes: 2665 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2666 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2667 called this routine. 2668 2669 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2670 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2671 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2672 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2673 block column and the second index is over columns within a block. 2674 2675 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2676 2677 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2678 @*/ 2679 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2680 { 2681 PetscFunctionBegin; 2682 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2683 PetscValidType(B, 1); 2684 PetscValidLogicalCollectiveInt(B, bs, 2); 2685 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2686 PetscFunctionReturn(PETSC_SUCCESS); 2687 } 2688 2689 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2690 { 2691 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2692 PetscInt i; 2693 PetscMPIInt size; 2694 2695 PetscFunctionBegin; 2696 if (B->hash_active) { 2697 B->ops[0] = b->cops; 2698 B->hash_active = PETSC_FALSE; 2699 } 2700 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2701 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2702 PetscCall(PetscLayoutSetUp(B->rmap)); 2703 PetscCall(PetscLayoutSetUp(B->cmap)); 2704 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2705 2706 if (d_nnz) { 2707 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2708 } 2709 if (o_nnz) { 2710 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2711 } 2712 2713 b->bs2 = bs * bs; 2714 b->mbs = B->rmap->n / bs; 2715 b->nbs = B->cmap->n / bs; 2716 b->Mbs = B->rmap->N / bs; 2717 b->Nbs = B->cmap->N / bs; 2718 2719 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2720 b->rstartbs = B->rmap->rstart / bs; 2721 b->rendbs = B->rmap->rend / bs; 2722 b->cstartbs = B->cmap->rstart / bs; 2723 b->cendbs = B->cmap->rend / bs; 2724 2725 #if defined(PETSC_USE_CTABLE) 2726 PetscCall(PetscHMapIDestroy(&b->colmap)); 2727 #else 2728 PetscCall(PetscFree(b->colmap)); 2729 #endif 2730 PetscCall(PetscFree(b->garray)); 2731 PetscCall(VecDestroy(&b->lvec)); 2732 PetscCall(VecScatterDestroy(&b->Mvctx)); 2733 2734 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2735 2736 MatSeqXAIJGetOptions_Private(b->B); 2737 PetscCall(MatDestroy(&b->B)); 2738 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2739 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2740 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2741 MatSeqXAIJRestoreOptions_Private(b->B); 2742 2743 MatSeqXAIJGetOptions_Private(b->A); 2744 PetscCall(MatDestroy(&b->A)); 2745 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2746 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2747 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2748 MatSeqXAIJRestoreOptions_Private(b->A); 2749 2750 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2751 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2752 B->preallocated = PETSC_TRUE; 2753 B->was_assembled = PETSC_FALSE; 2754 B->assembled = PETSC_FALSE; 2755 PetscFunctionReturn(PETSC_SUCCESS); 2756 } 2757 2758 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2759 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2760 2761 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2762 { 2763 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2764 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2765 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2766 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2767 2768 PetscFunctionBegin; 2769 PetscCall(PetscMalloc1(M + 1, &ii)); 2770 ii[0] = 0; 2771 for (i = 0; i < M; i++) { 2772 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2773 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2774 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2775 /* remove one from count of matrix has diagonal */ 2776 for (j = id[i]; j < id[i + 1]; j++) { 2777 if (jd[j] == i) { 2778 ii[i + 1]--; 2779 break; 2780 } 2781 } 2782 } 2783 PetscCall(PetscMalloc1(ii[M], &jj)); 2784 cnt = 0; 2785 for (i = 0; i < M; i++) { 2786 for (j = io[i]; j < io[i + 1]; j++) { 2787 if (garray[jo[j]] > rstart) break; 2788 jj[cnt++] = garray[jo[j]]; 2789 } 2790 for (k = id[i]; k < id[i + 1]; k++) { 2791 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2792 } 2793 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2794 } 2795 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2796 PetscFunctionReturn(PETSC_SUCCESS); 2797 } 2798 2799 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2800 2801 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2802 2803 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2804 { 2805 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2806 Mat_MPIAIJ *b; 2807 Mat B; 2808 2809 PetscFunctionBegin; 2810 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2811 2812 if (reuse == MAT_REUSE_MATRIX) { 2813 B = *newmat; 2814 } else { 2815 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2816 PetscCall(MatSetType(B, MATMPIAIJ)); 2817 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2818 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2819 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2820 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2821 } 2822 b = (Mat_MPIAIJ *)B->data; 2823 2824 if (reuse == MAT_REUSE_MATRIX) { 2825 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2826 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2827 } else { 2828 PetscInt *garray = a->garray; 2829 Mat_SeqAIJ *bB; 2830 PetscInt bs, nnz; 2831 PetscCall(MatDestroy(&b->A)); 2832 PetscCall(MatDestroy(&b->B)); 2833 /* just clear out the data structure */ 2834 PetscCall(MatDisAssemble_MPIAIJ(B)); 2835 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2836 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2837 2838 /* Global numbering for b->B columns */ 2839 bB = (Mat_SeqAIJ *)b->B->data; 2840 bs = A->rmap->bs; 2841 nnz = bB->i[A->rmap->n]; 2842 for (PetscInt k = 0; k < nnz; k++) { 2843 PetscInt bj = bB->j[k] / bs; 2844 PetscInt br = bB->j[k] % bs; 2845 bB->j[k] = garray[bj] * bs + br; 2846 } 2847 } 2848 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2849 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2850 2851 if (reuse == MAT_INPLACE_MATRIX) { 2852 PetscCall(MatHeaderReplace(A, &B)); 2853 } else { 2854 *newmat = B; 2855 } 2856 PetscFunctionReturn(PETSC_SUCCESS); 2857 } 2858 2859 /*MC 2860 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2861 2862 Options Database Keys: 2863 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2864 . -mat_block_size <bs> - set the blocksize used to store the matrix 2865 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2866 - -mat_use_hash_table <fact> - set hash table factor 2867 2868 Level: beginner 2869 2870 Note: 2871 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2872 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2873 2874 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2875 M*/ 2876 2877 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2878 2879 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2880 { 2881 Mat_MPIBAIJ *b; 2882 PetscBool flg = PETSC_FALSE; 2883 2884 PetscFunctionBegin; 2885 PetscCall(PetscNew(&b)); 2886 B->data = (void *)b; 2887 B->ops[0] = MatOps_Values; 2888 B->assembled = PETSC_FALSE; 2889 2890 B->insertmode = NOT_SET_VALUES; 2891 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2892 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2893 2894 /* build local table of row and column ownerships */ 2895 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2896 2897 /* build cache for off array entries formed */ 2898 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2899 2900 b->donotstash = PETSC_FALSE; 2901 b->colmap = NULL; 2902 b->garray = NULL; 2903 b->roworiented = PETSC_TRUE; 2904 2905 /* stuff used in block assembly */ 2906 b->barray = NULL; 2907 2908 /* stuff used for matrix vector multiply */ 2909 b->lvec = NULL; 2910 b->Mvctx = NULL; 2911 2912 /* stuff for MatGetRow() */ 2913 b->rowindices = NULL; 2914 b->rowvalues = NULL; 2915 b->getrowactive = PETSC_FALSE; 2916 2917 /* hash table stuff */ 2918 b->ht = NULL; 2919 b->hd = NULL; 2920 b->ht_size = 0; 2921 b->ht_flag = PETSC_FALSE; 2922 b->ht_fact = 0; 2923 b->ht_total_ct = 0; 2924 b->ht_insert_ct = 0; 2925 2926 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2927 b->ijonly = PETSC_FALSE; 2928 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2932 #if defined(PETSC_HAVE_HYPRE) 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2934 #endif 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2942 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2943 2944 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2945 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2946 if (flg) { 2947 PetscReal fact = 1.39; 2948 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2949 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2950 if (fact <= 1.0) fact = 1.39; 2951 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2952 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2953 } 2954 PetscOptionsEnd(); 2955 PetscFunctionReturn(PETSC_SUCCESS); 2956 } 2957 2958 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2959 /*MC 2960 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2961 2962 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2963 and `MATMPIBAIJ` otherwise. 2964 2965 Options Database Keys: 2966 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2967 2968 Level: beginner 2969 2970 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2971 M*/ 2972 2973 /*@C 2974 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2975 (block compressed row). 2976 2977 Collective 2978 2979 Input Parameters: 2980 + B - the matrix 2981 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2982 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2983 . d_nz - number of block nonzeros per block row in diagonal portion of local 2984 submatrix (same for all local rows) 2985 . d_nnz - array containing the number of block nonzeros in the various block rows 2986 of the in diagonal portion of the local (possibly different for each block 2987 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2988 set it even if it is zero. 2989 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2990 submatrix (same for all local rows). 2991 - o_nnz - array containing the number of nonzeros in the various block rows of the 2992 off-diagonal portion of the local submatrix (possibly different for 2993 each block row) or `NULL`. 2994 2995 If the *_nnz parameter is given then the *_nz parameter is ignored 2996 2997 Options Database Keys: 2998 + -mat_block_size - size of the blocks to use 2999 - -mat_use_hash_table <fact> - set hash table factor 3000 3001 Level: intermediate 3002 3003 Notes: 3004 For good matrix assembly performance 3005 the user should preallocate the matrix storage by setting the parameters 3006 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3007 performance can be increased by more than a factor of 50. 3008 3009 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3010 than it must be used on all processors that share the object for that argument. 3011 3012 Storage Information: 3013 For a square global matrix we define each processor's diagonal portion 3014 to be its local rows and the corresponding columns (a square submatrix); 3015 each processor's off-diagonal portion encompasses the remainder of the 3016 local matrix (a rectangular submatrix). 3017 3018 The user can specify preallocated storage for the diagonal part of 3019 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3020 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3021 memory allocation. Likewise, specify preallocated storage for the 3022 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3023 3024 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3025 the figure below we depict these three local rows and all columns (0-11). 3026 3027 .vb 3028 0 1 2 3 4 5 6 7 8 9 10 11 3029 -------------------------- 3030 row 3 |o o o d d d o o o o o o 3031 row 4 |o o o d d d o o o o o o 3032 row 5 |o o o d d d o o o o o o 3033 -------------------------- 3034 .ve 3035 3036 Thus, any entries in the d locations are stored in the d (diagonal) 3037 submatrix, and any entries in the o locations are stored in the 3038 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3039 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3040 3041 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3042 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3043 In general, for PDE problems in which most nonzeros are near the diagonal, 3044 one expects `d_nz` >> `o_nz`. 3045 3046 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3047 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3048 You can also run with the option `-info` and look for messages with the string 3049 malloc in them to see if additional memory allocation was needed. 3050 3051 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3052 @*/ 3053 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3054 { 3055 PetscFunctionBegin; 3056 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3057 PetscValidType(B, 1); 3058 PetscValidLogicalCollectiveInt(B, bs, 2); 3059 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3060 PetscFunctionReturn(PETSC_SUCCESS); 3061 } 3062 3063 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3064 /*@C 3065 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3066 (block compressed row). 3067 3068 Collective 3069 3070 Input Parameters: 3071 + comm - MPI communicator 3072 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3073 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3074 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3075 This value should be the same as the local size used in creating the 3076 y vector for the matrix-vector product y = Ax. 3077 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3078 This value should be the same as the local size used in creating the 3079 x vector for the matrix-vector product y = Ax. 3080 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3081 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3082 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3083 submatrix (same for all local rows) 3084 . d_nnz - array containing the number of nonzero blocks in the various block rows 3085 of the in diagonal portion of the local (possibly different for each block 3086 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3087 and set it even if it is zero. 3088 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3089 submatrix (same for all local rows). 3090 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3091 off-diagonal portion of the local submatrix (possibly different for 3092 each block row) or NULL. 3093 3094 Output Parameter: 3095 . A - the matrix 3096 3097 Options Database Keys: 3098 + -mat_block_size - size of the blocks to use 3099 - -mat_use_hash_table <fact> - set hash table factor 3100 3101 Level: intermediate 3102 3103 Notes: 3104 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3105 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3106 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3107 3108 For good matrix assembly performance 3109 the user should preallocate the matrix storage by setting the parameters 3110 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3111 performance can be increased by more than a factor of 50. 3112 3113 If the *_nnz parameter is given then the *_nz parameter is ignored 3114 3115 A nonzero block is any block that as 1 or more nonzeros in it 3116 3117 The user MUST specify either the local or global matrix dimensions 3118 (possibly both). 3119 3120 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3121 than it must be used on all processors that share the object for that argument. 3122 3123 Storage Information: 3124 For a square global matrix we define each processor's diagonal portion 3125 to be its local rows and the corresponding columns (a square submatrix); 3126 each processor's off-diagonal portion encompasses the remainder of the 3127 local matrix (a rectangular submatrix). 3128 3129 The user can specify preallocated storage for the diagonal part of 3130 the local submatrix with either d_nz or d_nnz (not both). Set 3131 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3132 memory allocation. Likewise, specify preallocated storage for the 3133 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3134 3135 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3136 the figure below we depict these three local rows and all columns (0-11). 3137 3138 .vb 3139 0 1 2 3 4 5 6 7 8 9 10 11 3140 -------------------------- 3141 row 3 |o o o d d d o o o o o o 3142 row 4 |o o o d d d o o o o o o 3143 row 5 |o o o d d d o o o o o o 3144 -------------------------- 3145 .ve 3146 3147 Thus, any entries in the d locations are stored in the d (diagonal) 3148 submatrix, and any entries in the o locations are stored in the 3149 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3150 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3151 3152 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3153 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3154 In general, for PDE problems in which most nonzeros are near the diagonal, 3155 one expects `d_nz` >> `o_nz`. 3156 3157 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3158 @*/ 3159 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3160 { 3161 PetscMPIInt size; 3162 3163 PetscFunctionBegin; 3164 PetscCall(MatCreate(comm, A)); 3165 PetscCall(MatSetSizes(*A, m, n, M, N)); 3166 PetscCallMPI(MPI_Comm_size(comm, &size)); 3167 if (size > 1) { 3168 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3169 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3170 } else { 3171 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3172 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3173 } 3174 PetscFunctionReturn(PETSC_SUCCESS); 3175 } 3176 3177 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3178 { 3179 Mat mat; 3180 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3181 PetscInt len = 0; 3182 3183 PetscFunctionBegin; 3184 *newmat = NULL; 3185 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3186 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3187 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3188 3189 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3190 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3191 if (matin->hash_active) { 3192 PetscCall(MatSetUp(mat)); 3193 } else { 3194 mat->factortype = matin->factortype; 3195 mat->preallocated = PETSC_TRUE; 3196 mat->assembled = PETSC_TRUE; 3197 mat->insertmode = NOT_SET_VALUES; 3198 3199 a = (Mat_MPIBAIJ *)mat->data; 3200 mat->rmap->bs = matin->rmap->bs; 3201 a->bs2 = oldmat->bs2; 3202 a->mbs = oldmat->mbs; 3203 a->nbs = oldmat->nbs; 3204 a->Mbs = oldmat->Mbs; 3205 a->Nbs = oldmat->Nbs; 3206 3207 a->size = oldmat->size; 3208 a->rank = oldmat->rank; 3209 a->donotstash = oldmat->donotstash; 3210 a->roworiented = oldmat->roworiented; 3211 a->rowindices = NULL; 3212 a->rowvalues = NULL; 3213 a->getrowactive = PETSC_FALSE; 3214 a->barray = NULL; 3215 a->rstartbs = oldmat->rstartbs; 3216 a->rendbs = oldmat->rendbs; 3217 a->cstartbs = oldmat->cstartbs; 3218 a->cendbs = oldmat->cendbs; 3219 3220 /* hash table stuff */ 3221 a->ht = NULL; 3222 a->hd = NULL; 3223 a->ht_size = 0; 3224 a->ht_flag = oldmat->ht_flag; 3225 a->ht_fact = oldmat->ht_fact; 3226 a->ht_total_ct = 0; 3227 a->ht_insert_ct = 0; 3228 3229 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3230 if (oldmat->colmap) { 3231 #if defined(PETSC_USE_CTABLE) 3232 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3233 #else 3234 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3235 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3236 #endif 3237 } else a->colmap = NULL; 3238 3239 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3240 PetscCall(PetscMalloc1(len, &a->garray)); 3241 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3242 } else a->garray = NULL; 3243 3244 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3245 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3246 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3247 3248 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3249 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3250 } 3251 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3252 *newmat = mat; 3253 PetscFunctionReturn(PETSC_SUCCESS); 3254 } 3255 3256 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3257 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3258 { 3259 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3260 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3261 PetscScalar *matvals; 3262 3263 PetscFunctionBegin; 3264 PetscCall(PetscViewerSetUp(viewer)); 3265 3266 /* read in matrix header */ 3267 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3268 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3269 M = header[1]; 3270 N = header[2]; 3271 nz = header[3]; 3272 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3273 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3274 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3275 3276 /* set block sizes from the viewer's .info file */ 3277 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3278 /* set local sizes if not set already */ 3279 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3280 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3281 /* set global sizes if not set already */ 3282 if (mat->rmap->N < 0) mat->rmap->N = M; 3283 if (mat->cmap->N < 0) mat->cmap->N = N; 3284 PetscCall(PetscLayoutSetUp(mat->rmap)); 3285 PetscCall(PetscLayoutSetUp(mat->cmap)); 3286 3287 /* check if the matrix sizes are correct */ 3288 PetscCall(MatGetSize(mat, &rows, &cols)); 3289 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3290 PetscCall(MatGetBlockSize(mat, &bs)); 3291 PetscCall(MatGetLocalSize(mat, &m, &n)); 3292 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3293 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3294 mbs = m / bs; 3295 nbs = n / bs; 3296 3297 /* read in row lengths and build row indices */ 3298 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3299 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3300 rowidxs[0] = 0; 3301 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3302 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3303 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3304 3305 /* read in column indices and matrix values */ 3306 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3307 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3308 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3309 3310 { /* preallocate matrix storage */ 3311 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3312 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3313 PetscBool sbaij, done; 3314 PetscInt *d_nnz, *o_nnz; 3315 3316 PetscCall(PetscBTCreate(nbs, &bt)); 3317 PetscCall(PetscHSetICreate(&ht)); 3318 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3319 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3320 for (i = 0; i < mbs; i++) { 3321 PetscCall(PetscBTMemzero(nbs, bt)); 3322 PetscCall(PetscHSetIClear(ht)); 3323 for (k = 0; k < bs; k++) { 3324 PetscInt row = bs * i + k; 3325 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3326 PetscInt col = colidxs[j]; 3327 if (!sbaij || col >= row) { 3328 if (col >= cs && col < ce) { 3329 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3330 } else { 3331 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3332 if (done) o_nnz[i]++; 3333 } 3334 } 3335 } 3336 } 3337 } 3338 PetscCall(PetscBTDestroy(&bt)); 3339 PetscCall(PetscHSetIDestroy(&ht)); 3340 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3341 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3342 PetscCall(PetscFree2(d_nnz, o_nnz)); 3343 } 3344 3345 /* store matrix values */ 3346 for (i = 0; i < m; i++) { 3347 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3348 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3349 } 3350 3351 PetscCall(PetscFree(rowidxs)); 3352 PetscCall(PetscFree2(colidxs, matvals)); 3353 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3354 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3355 PetscFunctionReturn(PETSC_SUCCESS); 3356 } 3357 3358 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3359 { 3360 PetscBool isbinary; 3361 3362 PetscFunctionBegin; 3363 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3364 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3365 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3366 PetscFunctionReturn(PETSC_SUCCESS); 3367 } 3368 3369 /*@ 3370 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3371 3372 Input Parameters: 3373 + mat - the matrix 3374 - fact - factor 3375 3376 Options Database Key: 3377 . -mat_use_hash_table <fact> - provide the factor 3378 3379 Level: advanced 3380 3381 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3382 @*/ 3383 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3384 { 3385 PetscFunctionBegin; 3386 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3387 PetscFunctionReturn(PETSC_SUCCESS); 3388 } 3389 3390 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3391 { 3392 Mat_MPIBAIJ *baij; 3393 3394 PetscFunctionBegin; 3395 baij = (Mat_MPIBAIJ *)mat->data; 3396 baij->ht_fact = fact; 3397 PetscFunctionReturn(PETSC_SUCCESS); 3398 } 3399 3400 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3401 { 3402 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3403 PetscBool flg; 3404 3405 PetscFunctionBegin; 3406 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3407 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3408 if (Ad) *Ad = a->A; 3409 if (Ao) *Ao = a->B; 3410 if (colmap) *colmap = a->garray; 3411 PetscFunctionReturn(PETSC_SUCCESS); 3412 } 3413 3414 /* 3415 Special version for direct calls from Fortran (to eliminate two function call overheads 3416 */ 3417 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3418 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3419 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3420 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3421 #endif 3422 3423 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3424 /*@C 3425 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3426 3427 Collective 3428 3429 Input Parameters: 3430 + matin - the matrix 3431 . min - number of input rows 3432 . im - input rows 3433 . nin - number of input columns 3434 . in - input columns 3435 . v - numerical values input 3436 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3437 3438 Level: advanced 3439 3440 Developer Notes: 3441 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3442 3443 .seealso: `Mat`, `MatSetValuesBlocked()` 3444 @*/ 3445 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3446 { 3447 /* convert input arguments to C version */ 3448 Mat mat = *matin; 3449 PetscInt m = *min, n = *nin; 3450 InsertMode addv = *addvin; 3451 3452 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3453 const MatScalar *value; 3454 MatScalar *barray = baij->barray; 3455 PetscBool roworiented = baij->roworiented; 3456 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3457 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3458 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3459 3460 PetscFunctionBegin; 3461 /* tasks normally handled by MatSetValuesBlocked() */ 3462 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3463 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3464 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3465 if (mat->assembled) { 3466 mat->was_assembled = PETSC_TRUE; 3467 mat->assembled = PETSC_FALSE; 3468 } 3469 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3470 3471 if (!barray) { 3472 PetscCall(PetscMalloc1(bs2, &barray)); 3473 baij->barray = barray; 3474 } 3475 3476 if (roworiented) stepval = (n - 1) * bs; 3477 else stepval = (m - 1) * bs; 3478 3479 for (i = 0; i < m; i++) { 3480 if (im[i] < 0) continue; 3481 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3482 if (im[i] >= rstart && im[i] < rend) { 3483 row = im[i] - rstart; 3484 for (j = 0; j < n; j++) { 3485 /* If NumCol = 1 then a copy is not required */ 3486 if ((roworiented) && (n == 1)) { 3487 barray = (MatScalar *)v + i * bs2; 3488 } else if ((!roworiented) && (m == 1)) { 3489 barray = (MatScalar *)v + j * bs2; 3490 } else { /* Here a copy is required */ 3491 if (roworiented) { 3492 value = v + i * (stepval + bs) * bs + j * bs; 3493 } else { 3494 value = v + j * (stepval + bs) * bs + i * bs; 3495 } 3496 for (ii = 0; ii < bs; ii++, value += stepval) { 3497 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3498 } 3499 barray -= bs2; 3500 } 3501 3502 if (in[j] >= cstart && in[j] < cend) { 3503 col = in[j] - cstart; 3504 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3505 } else if (in[j] < 0) { 3506 continue; 3507 } else { 3508 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3509 if (mat->was_assembled) { 3510 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3511 3512 #if defined(PETSC_USE_DEBUG) 3513 #if defined(PETSC_USE_CTABLE) 3514 { 3515 PetscInt data; 3516 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3517 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3518 } 3519 #else 3520 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3521 #endif 3522 #endif 3523 #if defined(PETSC_USE_CTABLE) 3524 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3525 col = (col - 1) / bs; 3526 #else 3527 col = (baij->colmap[in[j]] - 1) / bs; 3528 #endif 3529 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3530 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3531 col = in[j]; 3532 } 3533 } else col = in[j]; 3534 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3535 } 3536 } 3537 } else { 3538 if (!baij->donotstash) { 3539 if (roworiented) { 3540 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3541 } else { 3542 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3543 } 3544 } 3545 } 3546 } 3547 3548 /* task normally handled by MatSetValuesBlocked() */ 3549 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3550 PetscFunctionReturn(PETSC_SUCCESS); 3551 } 3552 3553 /*@ 3554 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3555 3556 Collective 3557 3558 Input Parameters: 3559 + comm - MPI communicator 3560 . bs - the block size, only a block size of 1 is supported 3561 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3562 . n - This value should be the same as the local size used in creating the 3563 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3564 calculated if `N` is given) For square matrices `n` is almost always `m`. 3565 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3566 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3567 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3568 . j - column indices 3569 - a - matrix values 3570 3571 Output Parameter: 3572 . mat - the matrix 3573 3574 Level: intermediate 3575 3576 Notes: 3577 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3578 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3579 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3580 3581 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3582 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3583 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3584 with column-major ordering within blocks. 3585 3586 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3587 3588 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3589 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3590 @*/ 3591 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3592 { 3593 PetscFunctionBegin; 3594 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3595 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3596 PetscCall(MatCreate(comm, mat)); 3597 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3598 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3599 PetscCall(MatSetBlockSize(*mat, bs)); 3600 PetscCall(MatSetUp(*mat)); 3601 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3602 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3603 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3604 PetscFunctionReturn(PETSC_SUCCESS); 3605 } 3606 3607 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3608 { 3609 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3610 PetscInt *indx; 3611 PetscScalar *values; 3612 3613 PetscFunctionBegin; 3614 PetscCall(MatGetSize(inmat, &m, &N)); 3615 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3616 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3617 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3618 PetscInt *bindx, rmax = a->rmax, j; 3619 PetscMPIInt rank, size; 3620 3621 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3622 mbs = m / bs; 3623 Nbs = N / cbs; 3624 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3625 nbs = n / cbs; 3626 3627 PetscCall(PetscMalloc1(rmax, &bindx)); 3628 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3629 3630 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3631 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3632 if (rank == size - 1) { 3633 /* Check sum(nbs) = Nbs */ 3634 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3635 } 3636 3637 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3638 for (i = 0; i < mbs; i++) { 3639 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3640 nnz = nnz / bs; 3641 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3642 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3643 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3644 } 3645 PetscCall(PetscFree(bindx)); 3646 3647 PetscCall(MatCreate(comm, outmat)); 3648 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3649 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3650 PetscCall(MatSetType(*outmat, MATBAIJ)); 3651 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3652 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3653 MatPreallocateEnd(dnz, onz); 3654 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3655 } 3656 3657 /* numeric phase */ 3658 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3659 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3660 3661 for (i = 0; i < m; i++) { 3662 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3663 Ii = i + rstart; 3664 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3665 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3666 } 3667 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3668 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3669 PetscFunctionReturn(PETSC_SUCCESS); 3670 } 3671