1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *vv; 62 Vec vB, vA; 63 const PetscScalar *va, *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayRead(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(v, &vv)); 92 PetscCall(VecRestoreArrayRead(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 690 { 691 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 692 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 693 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 694 PetscReal sum = 0.0; 695 MatScalar *v; 696 697 PetscFunctionBegin; 698 if (baij->size == 1) { 699 PetscCall(MatNorm(baij->A, type, nrm)); 700 } else { 701 if (type == NORM_FROBENIUS) { 702 v = amat->a; 703 nz = amat->nz * bs2; 704 for (i = 0; i < nz; i++) { 705 sum += PetscRealPart(PetscConj(*v) * (*v)); 706 v++; 707 } 708 v = bmat->a; 709 nz = bmat->nz * bs2; 710 for (i = 0; i < nz; i++) { 711 sum += PetscRealPart(PetscConj(*v) * (*v)); 712 v++; 713 } 714 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 715 *nrm = PetscSqrtReal(*nrm); 716 } else if (type == NORM_1) { /* max column sum */ 717 Vec col; 718 PetscScalar *array; 719 PetscInt *jj, *garray = baij->garray; 720 721 PetscCall(MatCreateVecs(mat, &col, NULL)); 722 PetscCall(VecSet(col, 0.0)); 723 PetscCall(VecGetArrayWrite(col, &array)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 PetscInt col = bs * *jj + j; /* column index */ 729 730 for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++); 731 } 732 jj++; 733 } 734 PetscCall(VecRestoreArrayWrite(col, &array)); 735 v = bmat->a; 736 jj = bmat->j; 737 for (i = 0; i < bmat->nz; i++) { 738 for (j = 0; j < bs; j++) { 739 PetscScalar tmp = 0.0; 740 741 for (row = 0; row < bs; row++) tmp += PetscAbsScalar(*v++); 742 PetscCall(VecSetValue(col, bs * garray[*jj] + j, tmp, ADD_VALUES)); 743 } 744 jj++; 745 } 746 PetscCall(VecAssemblyBegin(col)); 747 PetscCall(VecAssemblyEnd(col)); 748 PetscCall(VecNorm(col, NORM_INFINITY, nrm)); 749 PetscCall(VecDestroy(&col)); 750 } else if (type == NORM_INFINITY) { /* max row sum */ 751 PetscReal *sums; 752 PetscCall(PetscMalloc1(bs, &sums)); 753 sum = 0.0; 754 for (j = 0; j < amat->mbs; j++) { 755 for (row = 0; row < bs; row++) sums[row] = 0.0; 756 v = amat->a + bs2 * amat->i[j]; 757 nz = amat->i[j + 1] - amat->i[j]; 758 for (i = 0; i < nz; i++) { 759 for (col = 0; col < bs; col++) { 760 for (row = 0; row < bs; row++) { 761 sums[row] += PetscAbsScalar(*v); 762 v++; 763 } 764 } 765 } 766 v = bmat->a + bs2 * bmat->i[j]; 767 nz = bmat->i[j + 1] - bmat->i[j]; 768 for (i = 0; i < nz; i++) { 769 for (col = 0; col < bs; col++) { 770 for (row = 0; row < bs; row++) { 771 sums[row] += PetscAbsScalar(*v); 772 v++; 773 } 774 } 775 } 776 for (row = 0; row < bs; row++) { 777 if (sums[row] > sum) sum = sums[row]; 778 } 779 } 780 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 781 PetscCall(PetscFree(sums)); 782 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 783 } 784 PetscFunctionReturn(PETSC_SUCCESS); 785 } 786 787 /* 788 Creates the hash table, and sets the table 789 This table is created only once. 790 If new entries need to be added to the matrix 791 then the hash table has to be destroyed and 792 recreated. 793 */ 794 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 795 { 796 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 797 Mat A = baij->A, B = baij->B; 798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 799 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 800 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 801 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 802 PetscInt *HT, key; 803 MatScalar **HD; 804 PetscReal tmp; 805 #if defined(PETSC_USE_INFO) 806 PetscInt ct = 0, max = 0; 807 #endif 808 809 PetscFunctionBegin; 810 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 811 812 baij->ht_size = (PetscInt)(factor * nz); 813 ht_size = baij->ht_size; 814 815 /* Allocate Memory for Hash Table */ 816 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 817 HD = baij->hd; 818 HT = baij->ht; 819 820 /* Loop Over A */ 821 for (i = 0; i < a->mbs; i++) { 822 for (j = ai[i]; j < ai[i + 1]; j++) { 823 row = i + rstart; 824 col = aj[j] + cstart; 825 826 key = row * Nbs + col + 1; 827 h1 = HASH(ht_size, key, tmp); 828 for (k = 0; k < ht_size; k++) { 829 if (!HT[(h1 + k) % ht_size]) { 830 HT[(h1 + k) % ht_size] = key; 831 HD[(h1 + k) % ht_size] = a->a + j * bs2; 832 break; 833 #if defined(PETSC_USE_INFO) 834 } else { 835 ct++; 836 #endif 837 } 838 } 839 #if defined(PETSC_USE_INFO) 840 if (k > max) max = k; 841 #endif 842 } 843 } 844 /* Loop Over B */ 845 for (i = 0; i < b->mbs; i++) { 846 for (j = bi[i]; j < bi[i + 1]; j++) { 847 row = i + rstart; 848 col = garray[bj[j]]; 849 key = row * Nbs + col + 1; 850 h1 = HASH(ht_size, key, tmp); 851 for (k = 0; k < ht_size; k++) { 852 if (!HT[(h1 + k) % ht_size]) { 853 HT[(h1 + k) % ht_size] = key; 854 HD[(h1 + k) % ht_size] = b->a + j * bs2; 855 break; 856 #if defined(PETSC_USE_INFO) 857 } else { 858 ct++; 859 #endif 860 } 861 } 862 #if defined(PETSC_USE_INFO) 863 if (k > max) max = k; 864 #endif 865 } 866 } 867 868 /* Print Summary */ 869 #if defined(PETSC_USE_INFO) 870 for (i = 0, j = 0; i < ht_size; i++) { 871 if (HT[i]) j++; 872 } 873 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 874 #endif 875 PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 878 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 879 { 880 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 881 PetscInt nstash, reallocs; 882 883 PetscFunctionBegin; 884 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 885 886 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 887 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 888 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 889 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 890 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 891 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 892 PetscFunctionReturn(PETSC_SUCCESS); 893 } 894 895 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 896 { 897 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 899 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 900 PetscInt *row, *col; 901 PetscBool r1, r2, r3, all_assembled; 902 MatScalar *val; 903 PetscMPIInt n; 904 905 PetscFunctionBegin; 906 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 907 if (!baij->donotstash && !mat->nooffprocentries) { 908 while (1) { 909 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 910 if (!flg) break; 911 912 for (i = 0; i < n;) { 913 /* Now identify the consecutive vals belonging to the same row */ 914 for (j = i, rstart = row[j]; j < n; j++) { 915 if (row[j] != rstart) break; 916 } 917 if (j < n) ncols = j - i; 918 else ncols = n - i; 919 /* Now assemble all these values with a single function call */ 920 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 921 i = j; 922 } 923 } 924 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 925 /* Now process the block-stash. Since the values are stashed column-oriented, 926 set the row-oriented flag to column-oriented, and after MatSetValues() 927 restore the original flags */ 928 r1 = baij->roworiented; 929 r2 = a->roworiented; 930 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 931 932 baij->roworiented = PETSC_FALSE; 933 a->roworiented = PETSC_FALSE; 934 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 935 while (1) { 936 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 937 if (!flg) break; 938 939 for (i = 0; i < n;) { 940 /* Now identify the consecutive vals belonging to the same row */ 941 for (j = i, rstart = row[j]; j < n; j++) { 942 if (row[j] != rstart) break; 943 } 944 if (j < n) ncols = j - i; 945 else ncols = n - i; 946 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 947 i = j; 948 } 949 } 950 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 951 952 baij->roworiented = r1; 953 a->roworiented = r2; 954 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 955 } 956 957 PetscCall(MatAssemblyBegin(baij->A, mode)); 958 PetscCall(MatAssemblyEnd(baij->A, mode)); 959 960 /* determine if any process has disassembled, if so we must 961 also disassemble ourselves, in order that we may reassemble. */ 962 /* 963 if nonzero structure of submatrix B cannot change then we know that 964 no process disassembled thus we can skip this stuff 965 */ 966 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 967 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 968 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 969 } 970 971 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 972 PetscCall(MatAssemblyBegin(baij->B, mode)); 973 PetscCall(MatAssemblyEnd(baij->B, mode)); 974 975 #if defined(PETSC_USE_INFO) 976 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 977 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 978 979 baij->ht_total_ct = 0; 980 baij->ht_insert_ct = 0; 981 } 982 #endif 983 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 984 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 985 986 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 987 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 988 } 989 990 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 991 992 baij->rowvalues = NULL; 993 994 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 995 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 996 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 997 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 998 } 999 PetscFunctionReturn(PETSC_SUCCESS); 1000 } 1001 1002 #include <petscdraw.h> 1003 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1004 { 1005 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1006 PetscMPIInt rank = baij->rank; 1007 PetscInt bs = mat->rmap->bs; 1008 PetscBool isascii, isdraw; 1009 PetscViewer sviewer; 1010 PetscViewerFormat format; 1011 1012 PetscFunctionBegin; 1013 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1014 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1015 if (isascii) { 1016 PetscCall(PetscViewerGetFormat(viewer, &format)); 1017 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1018 MatInfo info; 1019 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1020 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1021 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1022 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1023 mat->rmap->bs, info.memory)); 1024 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1025 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1026 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1027 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1028 PetscCall(PetscViewerFlush(viewer)); 1029 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1030 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1031 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1032 PetscFunctionReturn(PETSC_SUCCESS); 1033 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1034 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1037 PetscFunctionReturn(PETSC_SUCCESS); 1038 } 1039 } 1040 1041 if (isdraw) { 1042 PetscDraw draw; 1043 PetscBool isnull; 1044 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1045 PetscCall(PetscDrawIsNull(draw, &isnull)); 1046 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1047 } 1048 1049 { 1050 /* assemble the entire matrix onto first processor. */ 1051 Mat A; 1052 Mat_SeqBAIJ *Aloc; 1053 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1054 MatScalar *a; 1055 const char *matname; 1056 1057 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1058 /* Perhaps this should be the type of mat? */ 1059 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1060 if (rank == 0) { 1061 PetscCall(MatSetSizes(A, M, N, M, N)); 1062 } else { 1063 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1064 } 1065 PetscCall(MatSetType(A, MATMPIBAIJ)); 1066 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1067 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1068 1069 /* copy over the A part */ 1070 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1071 ai = Aloc->i; 1072 aj = Aloc->j; 1073 a = Aloc->a; 1074 PetscCall(PetscMalloc1(bs, &rvals)); 1075 1076 for (i = 0; i < mbs; i++) { 1077 rvals[0] = bs * (baij->rstartbs + i); 1078 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1079 for (j = ai[i]; j < ai[i + 1]; j++) { 1080 col = (baij->cstartbs + aj[j]) * bs; 1081 for (k = 0; k < bs; k++) { 1082 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1083 col++; 1084 a += bs; 1085 } 1086 } 1087 } 1088 /* copy over the B part */ 1089 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1090 ai = Aloc->i; 1091 aj = Aloc->j; 1092 a = Aloc->a; 1093 for (i = 0; i < mbs; i++) { 1094 rvals[0] = bs * (baij->rstartbs + i); 1095 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1096 for (j = ai[i]; j < ai[i + 1]; j++) { 1097 col = baij->garray[aj[j]] * bs; 1098 for (k = 0; k < bs; k++) { 1099 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1100 col++; 1101 a += bs; 1102 } 1103 } 1104 } 1105 PetscCall(PetscFree(rvals)); 1106 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1107 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1108 /* 1109 Everyone has to call to draw the matrix since the graphics waits are 1110 synchronized across all processors that share the PetscDraw object 1111 */ 1112 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1113 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1114 if (rank == 0) { 1115 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1116 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1117 } 1118 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1119 PetscCall(MatDestroy(&A)); 1120 } 1121 PetscFunctionReturn(PETSC_SUCCESS); 1122 } 1123 1124 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1125 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1126 { 1127 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1128 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1130 const PetscInt *garray = aij->garray; 1131 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1132 PetscCount nz, hnz; 1133 PetscInt *rowlens, *colidxs; 1134 PetscScalar *matvals; 1135 PetscMPIInt rank; 1136 1137 PetscFunctionBegin; 1138 PetscCall(PetscViewerSetUp(viewer)); 1139 1140 M = mat->rmap->N; 1141 N = mat->cmap->N; 1142 m = mat->rmap->n; 1143 rs = mat->rmap->rstart; 1144 cs = mat->cmap->rstart; 1145 bs = mat->rmap->bs; 1146 nz = bs * bs * (A->nz + B->nz); 1147 1148 /* write matrix header */ 1149 header[0] = MAT_FILE_CLASSID; 1150 header[1] = M; 1151 header[2] = N; 1152 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1153 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1154 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1155 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1156 1157 /* fill in and store row lengths */ 1158 PetscCall(PetscMalloc1(m, &rowlens)); 1159 for (cnt = 0, i = 0; i < A->mbs; i++) 1160 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1161 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1162 PetscCall(PetscFree(rowlens)); 1163 1164 /* fill in and store column indices */ 1165 PetscCall(PetscMalloc1(nz, &colidxs)); 1166 for (cnt = 0, i = 0; i < A->mbs; i++) { 1167 for (k = 0; k < bs; k++) { 1168 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1169 if (garray[B->j[jb]] > cs / bs) break; 1170 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1171 } 1172 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1173 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1174 for (; jb < B->i[i + 1]; jb++) 1175 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1176 } 1177 } 1178 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1179 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1180 PetscCall(PetscFree(colidxs)); 1181 1182 /* fill in and store nonzero values */ 1183 PetscCall(PetscMalloc1(nz, &matvals)); 1184 for (cnt = 0, i = 0; i < A->mbs; i++) { 1185 for (k = 0; k < bs; k++) { 1186 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1187 if (garray[B->j[jb]] > cs / bs) break; 1188 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1189 } 1190 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1191 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1192 for (; jb < B->i[i + 1]; jb++) 1193 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1194 } 1195 } 1196 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1197 PetscCall(PetscFree(matvals)); 1198 1199 /* write block size option to the viewer's .info file */ 1200 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1201 PetscFunctionReturn(PETSC_SUCCESS); 1202 } 1203 1204 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1205 { 1206 PetscBool isascii, isdraw, issocket, isbinary; 1207 1208 PetscFunctionBegin; 1209 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1210 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1211 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1213 if (isascii || isdraw || issocket) PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1214 else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1215 PetscFunctionReturn(PETSC_SUCCESS); 1216 } 1217 1218 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1219 { 1220 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1221 PetscInt nt; 1222 1223 PetscFunctionBegin; 1224 PetscCall(VecGetLocalSize(xx, &nt)); 1225 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1226 PetscCall(VecGetLocalSize(yy, &nt)); 1227 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1228 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1229 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1230 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1231 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1232 PetscFunctionReturn(PETSC_SUCCESS); 1233 } 1234 1235 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1236 { 1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1238 1239 PetscFunctionBegin; 1240 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1241 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1242 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1243 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1244 PetscFunctionReturn(PETSC_SUCCESS); 1245 } 1246 1247 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1248 { 1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1250 1251 PetscFunctionBegin; 1252 /* do nondiagonal part */ 1253 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1254 /* do local part */ 1255 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1256 /* add partial results together */ 1257 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1258 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1259 PetscFunctionReturn(PETSC_SUCCESS); 1260 } 1261 1262 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1263 { 1264 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1265 1266 PetscFunctionBegin; 1267 /* do nondiagonal part */ 1268 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1269 /* do local part */ 1270 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1271 /* add partial results together */ 1272 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1273 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 /* 1278 This only works correctly for square matrices where the subblock A->A is the 1279 diagonal block 1280 */ 1281 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1282 { 1283 PetscFunctionBegin; 1284 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1285 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1286 PetscFunctionReturn(PETSC_SUCCESS); 1287 } 1288 1289 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1290 { 1291 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1292 1293 PetscFunctionBegin; 1294 PetscCall(MatScale(a->A, aa)); 1295 PetscCall(MatScale(a->B, aa)); 1296 PetscFunctionReturn(PETSC_SUCCESS); 1297 } 1298 1299 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1300 { 1301 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1302 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1303 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1304 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1305 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1306 1307 PetscFunctionBegin; 1308 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1309 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1310 mat->getrowactive = PETSC_TRUE; 1311 1312 if (!mat->rowvalues && (idx || v)) { 1313 /* 1314 allocate enough space to hold information from the longest row. 1315 */ 1316 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1317 PetscInt max = 1, mbs = mat->mbs, tmp; 1318 for (i = 0; i < mbs; i++) { 1319 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1320 if (max < tmp) max = tmp; 1321 } 1322 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1323 } 1324 lrow = row - brstart; 1325 1326 pvA = &vworkA; 1327 pcA = &cworkA; 1328 pvB = &vworkB; 1329 pcB = &cworkB; 1330 if (!v) { 1331 pvA = NULL; 1332 pvB = NULL; 1333 } 1334 if (!idx) { 1335 pcA = NULL; 1336 if (!v) pcB = NULL; 1337 } 1338 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1339 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1340 nztot = nzA + nzB; 1341 1342 cmap = mat->garray; 1343 if (v || idx) { 1344 if (nztot) { 1345 /* Sort by increasing column numbers, assuming A and B already sorted */ 1346 PetscInt imark = -1; 1347 if (v) { 1348 *v = v_p = mat->rowvalues; 1349 for (i = 0; i < nzB; i++) { 1350 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1351 else break; 1352 } 1353 imark = i; 1354 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1355 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1356 } 1357 if (idx) { 1358 *idx = idx_p = mat->rowindices; 1359 if (imark > -1) { 1360 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1361 } else { 1362 for (i = 0; i < nzB; i++) { 1363 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1364 else break; 1365 } 1366 imark = i; 1367 } 1368 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1369 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1370 } 1371 } else { 1372 if (idx) *idx = NULL; 1373 if (v) *v = NULL; 1374 } 1375 } 1376 *nz = nztot; 1377 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1378 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1379 PetscFunctionReturn(PETSC_SUCCESS); 1380 } 1381 1382 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1383 { 1384 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1385 1386 PetscFunctionBegin; 1387 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1388 baij->getrowactive = PETSC_FALSE; 1389 PetscFunctionReturn(PETSC_SUCCESS); 1390 } 1391 1392 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1393 { 1394 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1395 1396 PetscFunctionBegin; 1397 PetscCall(MatZeroEntries(l->A)); 1398 PetscCall(MatZeroEntries(l->B)); 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1403 { 1404 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1405 Mat A = a->A, B = a->B; 1406 PetscLogDouble isend[5], irecv[5]; 1407 1408 PetscFunctionBegin; 1409 info->block_size = (PetscReal)matin->rmap->bs; 1410 1411 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1412 1413 isend[0] = info->nz_used; 1414 isend[1] = info->nz_allocated; 1415 isend[2] = info->nz_unneeded; 1416 isend[3] = info->memory; 1417 isend[4] = info->mallocs; 1418 1419 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1420 1421 isend[0] += info->nz_used; 1422 isend[1] += info->nz_allocated; 1423 isend[2] += info->nz_unneeded; 1424 isend[3] += info->memory; 1425 isend[4] += info->mallocs; 1426 1427 if (flag == MAT_LOCAL) { 1428 info->nz_used = isend[0]; 1429 info->nz_allocated = isend[1]; 1430 info->nz_unneeded = isend[2]; 1431 info->memory = isend[3]; 1432 info->mallocs = isend[4]; 1433 } else if (flag == MAT_GLOBAL_MAX) { 1434 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1435 1436 info->nz_used = irecv[0]; 1437 info->nz_allocated = irecv[1]; 1438 info->nz_unneeded = irecv[2]; 1439 info->memory = irecv[3]; 1440 info->mallocs = irecv[4]; 1441 } else if (flag == MAT_GLOBAL_SUM) { 1442 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1443 1444 info->nz_used = irecv[0]; 1445 info->nz_allocated = irecv[1]; 1446 info->nz_unneeded = irecv[2]; 1447 info->memory = irecv[3]; 1448 info->mallocs = irecv[4]; 1449 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1450 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1451 info->fill_ratio_needed = 0; 1452 info->factor_mallocs = 0; 1453 PetscFunctionReturn(PETSC_SUCCESS); 1454 } 1455 1456 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1457 { 1458 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1459 1460 PetscFunctionBegin; 1461 switch (op) { 1462 case MAT_NEW_NONZERO_LOCATIONS: 1463 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1464 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1465 case MAT_KEEP_NONZERO_PATTERN: 1466 case MAT_NEW_NONZERO_LOCATION_ERR: 1467 MatCheckPreallocated(A, 1); 1468 PetscCall(MatSetOption(a->A, op, flg)); 1469 PetscCall(MatSetOption(a->B, op, flg)); 1470 break; 1471 case MAT_ROW_ORIENTED: 1472 MatCheckPreallocated(A, 1); 1473 a->roworiented = flg; 1474 1475 PetscCall(MatSetOption(a->A, op, flg)); 1476 PetscCall(MatSetOption(a->B, op, flg)); 1477 break; 1478 case MAT_IGNORE_OFF_PROC_ENTRIES: 1479 a->donotstash = flg; 1480 break; 1481 case MAT_USE_HASH_TABLE: 1482 a->ht_flag = flg; 1483 a->ht_fact = 1.39; 1484 break; 1485 case MAT_SYMMETRIC: 1486 case MAT_STRUCTURALLY_SYMMETRIC: 1487 case MAT_HERMITIAN: 1488 case MAT_SYMMETRY_ETERNAL: 1489 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1490 case MAT_SPD_ETERNAL: 1491 /* if the diagonal matrix is square it inherits some of the properties above */ 1492 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1493 break; 1494 default: 1495 break; 1496 } 1497 PetscFunctionReturn(PETSC_SUCCESS); 1498 } 1499 1500 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1501 { 1502 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1503 Mat_SeqBAIJ *Aloc; 1504 Mat B; 1505 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1506 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1507 MatScalar *a; 1508 1509 PetscFunctionBegin; 1510 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1511 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1512 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1513 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1514 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1515 /* Do not know preallocation information, but must set block size */ 1516 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1517 } else { 1518 B = *matout; 1519 } 1520 1521 /* copy over the A part */ 1522 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1523 ai = Aloc->i; 1524 aj = Aloc->j; 1525 a = Aloc->a; 1526 PetscCall(PetscMalloc1(bs, &rvals)); 1527 1528 for (i = 0; i < mbs; i++) { 1529 rvals[0] = bs * (baij->rstartbs + i); 1530 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1531 for (j = ai[i]; j < ai[i + 1]; j++) { 1532 col = (baij->cstartbs + aj[j]) * bs; 1533 for (k = 0; k < bs; k++) { 1534 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1535 1536 col++; 1537 a += bs; 1538 } 1539 } 1540 } 1541 /* copy over the B part */ 1542 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1543 ai = Aloc->i; 1544 aj = Aloc->j; 1545 a = Aloc->a; 1546 for (i = 0; i < mbs; i++) { 1547 rvals[0] = bs * (baij->rstartbs + i); 1548 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1549 for (j = ai[i]; j < ai[i + 1]; j++) { 1550 col = baij->garray[aj[j]] * bs; 1551 for (k = 0; k < bs; k++) { 1552 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1553 col++; 1554 a += bs; 1555 } 1556 } 1557 } 1558 PetscCall(PetscFree(rvals)); 1559 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1560 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1561 1562 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1563 else PetscCall(MatHeaderMerge(A, &B)); 1564 PetscFunctionReturn(PETSC_SUCCESS); 1565 } 1566 1567 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1568 { 1569 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1570 Mat a = baij->A, b = baij->B; 1571 PetscInt s1, s2, s3; 1572 1573 PetscFunctionBegin; 1574 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1575 if (rr) { 1576 PetscCall(VecGetLocalSize(rr, &s1)); 1577 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1578 /* Overlap communication with computation. */ 1579 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1580 } 1581 if (ll) { 1582 PetscCall(VecGetLocalSize(ll, &s1)); 1583 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1584 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1585 } 1586 /* scale the diagonal block */ 1587 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1588 1589 if (rr) { 1590 /* Do a scatter end and then right scale the off-diagonal block */ 1591 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1592 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1593 } 1594 PetscFunctionReturn(PETSC_SUCCESS); 1595 } 1596 1597 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1598 { 1599 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1600 PetscInt *lrows; 1601 PetscInt r, len; 1602 PetscBool cong; 1603 1604 PetscFunctionBegin; 1605 /* get locally owned rows */ 1606 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1607 /* fix right-hand side if needed */ 1608 if (x && b) { 1609 const PetscScalar *xx; 1610 PetscScalar *bb; 1611 1612 PetscCall(VecGetArrayRead(x, &xx)); 1613 PetscCall(VecGetArray(b, &bb)); 1614 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1615 PetscCall(VecRestoreArrayRead(x, &xx)); 1616 PetscCall(VecRestoreArray(b, &bb)); 1617 } 1618 1619 /* actually zap the local rows */ 1620 /* 1621 Zero the required rows. If the "diagonal block" of the matrix 1622 is square and the user wishes to set the diagonal we use separate 1623 code so that MatSetValues() is not called for each diagonal allocating 1624 new memory, thus calling lots of mallocs and slowing things down. 1625 1626 */ 1627 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1628 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1629 PetscCall(MatHasCongruentLayouts(A, &cong)); 1630 if ((diag != 0.0) && cong) { 1631 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1632 } else if (diag != 0.0) { 1633 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1634 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1635 for (r = 0; r < len; ++r) { 1636 const PetscInt row = lrows[r] + A->rmap->rstart; 1637 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1638 } 1639 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1640 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1641 } else { 1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1643 } 1644 PetscCall(PetscFree(lrows)); 1645 1646 /* only change matrix nonzero state if pattern was allowed to be changed */ 1647 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1648 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1649 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1650 } 1651 PetscFunctionReturn(PETSC_SUCCESS); 1652 } 1653 1654 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1655 { 1656 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1657 PetscMPIInt n, p = 0; 1658 PetscInt i, j, k, r, len = 0, row, col, count; 1659 PetscInt *lrows, *owners = A->rmap->range; 1660 PetscSFNode *rrows; 1661 PetscSF sf; 1662 const PetscScalar *xx; 1663 PetscScalar *bb, *mask; 1664 Vec xmask, lmask; 1665 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1666 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1667 PetscScalar *aa; 1668 1669 PetscFunctionBegin; 1670 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1671 /* Create SF where leaves are input rows and roots are owned rows */ 1672 PetscCall(PetscMalloc1(n, &lrows)); 1673 for (r = 0; r < n; ++r) lrows[r] = -1; 1674 PetscCall(PetscMalloc1(N, &rrows)); 1675 for (r = 0; r < N; ++r) { 1676 const PetscInt idx = rows[r]; 1677 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1678 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1679 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1680 } 1681 rrows[r].rank = p; 1682 rrows[r].index = rows[r] - owners[p]; 1683 } 1684 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1685 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1686 /* Collect flags for rows to be zeroed */ 1687 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1688 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1689 PetscCall(PetscSFDestroy(&sf)); 1690 /* Compress and put in row numbers */ 1691 for (r = 0; r < n; ++r) 1692 if (lrows[r] >= 0) lrows[len++] = r; 1693 /* zero diagonal part of matrix */ 1694 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1695 /* handle off-diagonal part of matrix */ 1696 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1697 PetscCall(VecDuplicate(l->lvec, &lmask)); 1698 PetscCall(VecGetArray(xmask, &bb)); 1699 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1700 PetscCall(VecRestoreArray(xmask, &bb)); 1701 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1702 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1703 PetscCall(VecDestroy(&xmask)); 1704 if (x) { 1705 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1706 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1707 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1708 PetscCall(VecGetArray(b, &bb)); 1709 } 1710 PetscCall(VecGetArray(lmask, &mask)); 1711 /* remove zeroed rows of off-diagonal matrix */ 1712 for (i = 0; i < len; ++i) { 1713 row = lrows[i]; 1714 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1715 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1716 for (k = 0; k < count; ++k) { 1717 aa[0] = 0.0; 1718 aa += bs; 1719 } 1720 } 1721 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1722 for (i = 0; i < l->B->rmap->N; ++i) { 1723 row = i / bs; 1724 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1725 for (k = 0; k < bs; ++k) { 1726 col = bs * baij->j[j] + k; 1727 if (PetscAbsScalar(mask[col])) { 1728 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1729 if (x) bb[i] -= aa[0] * xx[col]; 1730 aa[0] = 0.0; 1731 } 1732 } 1733 } 1734 } 1735 if (x) { 1736 PetscCall(VecRestoreArray(b, &bb)); 1737 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1738 } 1739 PetscCall(VecRestoreArray(lmask, &mask)); 1740 PetscCall(VecDestroy(&lmask)); 1741 PetscCall(PetscFree(lrows)); 1742 1743 /* only change matrix nonzero state if pattern was allowed to be changed */ 1744 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1745 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1746 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1747 } 1748 PetscFunctionReturn(PETSC_SUCCESS); 1749 } 1750 1751 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1752 { 1753 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1754 1755 PetscFunctionBegin; 1756 PetscCall(MatSetUnfactored(a->A)); 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1761 1762 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1763 { 1764 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1765 Mat a, b, c, d; 1766 PetscBool flg; 1767 1768 PetscFunctionBegin; 1769 a = matA->A; 1770 b = matA->B; 1771 c = matB->A; 1772 d = matB->B; 1773 1774 PetscCall(MatEqual(a, c, &flg)); 1775 if (flg) PetscCall(MatEqual(b, d, &flg)); 1776 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1777 PetscFunctionReturn(PETSC_SUCCESS); 1778 } 1779 1780 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1781 { 1782 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1783 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1784 1785 PetscFunctionBegin; 1786 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1787 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1788 PetscCall(MatCopy_Basic(A, B, str)); 1789 } else { 1790 PetscCall(MatCopy(a->A, b->A, str)); 1791 PetscCall(MatCopy(a->B, b->B, str)); 1792 } 1793 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1798 { 1799 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1800 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1801 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1802 1803 PetscFunctionBegin; 1804 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1809 { 1810 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1811 PetscBLASInt bnz, one = 1; 1812 Mat_SeqBAIJ *x, *y; 1813 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1814 1815 PetscFunctionBegin; 1816 if (str == SAME_NONZERO_PATTERN) { 1817 PetscScalar alpha = a; 1818 x = (Mat_SeqBAIJ *)xx->A->data; 1819 y = (Mat_SeqBAIJ *)yy->A->data; 1820 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1821 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1822 x = (Mat_SeqBAIJ *)xx->B->data; 1823 y = (Mat_SeqBAIJ *)yy->B->data; 1824 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1825 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1826 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1827 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1828 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1829 } else { 1830 Mat B; 1831 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1832 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1833 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1834 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1835 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1836 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1837 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1838 PetscCall(MatSetType(B, MATMPIBAIJ)); 1839 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1840 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1841 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1842 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1843 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1844 PetscCall(MatHeaderMerge(Y, &B)); 1845 PetscCall(PetscFree(nnz_d)); 1846 PetscCall(PetscFree(nnz_o)); 1847 } 1848 PetscFunctionReturn(PETSC_SUCCESS); 1849 } 1850 1851 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1852 { 1853 PetscFunctionBegin; 1854 if (PetscDefined(USE_COMPLEX)) { 1855 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1856 1857 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1858 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1859 } 1860 PetscFunctionReturn(PETSC_SUCCESS); 1861 } 1862 1863 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1864 { 1865 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1866 1867 PetscFunctionBegin; 1868 PetscCall(MatRealPart(a->A)); 1869 PetscCall(MatRealPart(a->B)); 1870 PetscFunctionReturn(PETSC_SUCCESS); 1871 } 1872 1873 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1874 { 1875 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1876 1877 PetscFunctionBegin; 1878 PetscCall(MatImaginaryPart(a->A)); 1879 PetscCall(MatImaginaryPart(a->B)); 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1884 { 1885 IS iscol_local; 1886 PetscInt csize; 1887 1888 PetscFunctionBegin; 1889 PetscCall(ISGetLocalSize(iscol, &csize)); 1890 if (call == MAT_REUSE_MATRIX) { 1891 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1892 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1893 } else { 1894 PetscCall(ISAllGather(iscol, &iscol_local)); 1895 } 1896 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1897 if (call == MAT_INITIAL_MATRIX) { 1898 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1899 PetscCall(ISDestroy(&iscol_local)); 1900 } 1901 PetscFunctionReturn(PETSC_SUCCESS); 1902 } 1903 1904 /* 1905 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1906 in local and then by concatenating the local matrices the end result. 1907 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1908 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1909 */ 1910 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1911 { 1912 PetscMPIInt rank, size; 1913 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1914 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1915 Mat M, Mreuse; 1916 MatScalar *vwork, *aa; 1917 MPI_Comm comm; 1918 IS isrow_new, iscol_new; 1919 Mat_SeqBAIJ *aij; 1920 1921 PetscFunctionBegin; 1922 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1923 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1924 PetscCallMPI(MPI_Comm_size(comm, &size)); 1925 /* The compression and expansion should be avoided. Doesn't point 1926 out errors, might change the indices, hence buggey */ 1927 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1928 if (isrow == iscol) { 1929 iscol_new = isrow_new; 1930 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1931 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1932 1933 if (call == MAT_REUSE_MATRIX) { 1934 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1935 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1936 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1937 } else { 1938 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1939 } 1940 PetscCall(ISDestroy(&isrow_new)); 1941 PetscCall(ISDestroy(&iscol_new)); 1942 /* 1943 m - number of local rows 1944 n - number of columns (same on all processors) 1945 rstart - first row in new global matrix generated 1946 */ 1947 PetscCall(MatGetBlockSize(mat, &bs)); 1948 PetscCall(MatGetSize(Mreuse, &m, &n)); 1949 m = m / bs; 1950 n = n / bs; 1951 1952 if (call == MAT_INITIAL_MATRIX) { 1953 aij = (Mat_SeqBAIJ *)Mreuse->data; 1954 ii = aij->i; 1955 jj = aij->j; 1956 1957 /* 1958 Determine the number of non-zeros in the diagonal and off-diagonal 1959 portions of the matrix in order to do correct preallocation 1960 */ 1961 1962 /* first get start and end of "diagonal" columns */ 1963 if (csize == PETSC_DECIDE) { 1964 PetscCall(ISGetSize(isrow, &mglobal)); 1965 if (mglobal == n * bs) { /* square matrix */ 1966 nlocal = m; 1967 } else { 1968 nlocal = n / size + ((n % size) > rank); 1969 } 1970 } else { 1971 nlocal = csize / bs; 1972 } 1973 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1974 rstart = rend - nlocal; 1975 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1976 1977 /* next, compute all the lengths */ 1978 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1979 for (i = 0; i < m; i++) { 1980 jend = ii[i + 1] - ii[i]; 1981 olen = 0; 1982 dlen = 0; 1983 for (j = 0; j < jend; j++) { 1984 if (*jj < rstart || *jj >= rend) olen++; 1985 else dlen++; 1986 jj++; 1987 } 1988 olens[i] = olen; 1989 dlens[i] = dlen; 1990 } 1991 PetscCall(MatCreate(comm, &M)); 1992 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1993 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 1994 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1995 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1996 PetscCall(PetscFree2(dlens, olens)); 1997 } else { 1998 PetscInt ml, nl; 1999 2000 M = *newmat; 2001 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2002 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2003 PetscCall(MatZeroEntries(M)); 2004 /* 2005 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2006 rather than the slower MatSetValues(). 2007 */ 2008 M->was_assembled = PETSC_TRUE; 2009 M->assembled = PETSC_FALSE; 2010 } 2011 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2012 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2013 aij = (Mat_SeqBAIJ *)Mreuse->data; 2014 ii = aij->i; 2015 jj = aij->j; 2016 aa = aij->a; 2017 for (i = 0; i < m; i++) { 2018 row = rstart / bs + i; 2019 nz = ii[i + 1] - ii[i]; 2020 cwork = jj; 2021 jj = PetscSafePointerPlusOffset(jj, nz); 2022 vwork = aa; 2023 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2024 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2025 } 2026 2027 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2028 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2029 *newmat = M; 2030 2031 /* save submatrix used in processor for next request */ 2032 if (call == MAT_INITIAL_MATRIX) { 2033 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2034 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2035 } 2036 PetscFunctionReturn(PETSC_SUCCESS); 2037 } 2038 2039 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2040 { 2041 MPI_Comm comm, pcomm; 2042 PetscInt clocal_size, nrows; 2043 const PetscInt *rows; 2044 PetscMPIInt size; 2045 IS crowp, lcolp; 2046 2047 PetscFunctionBegin; 2048 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2049 /* make a collective version of 'rowp' */ 2050 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2051 if (pcomm == comm) { 2052 crowp = rowp; 2053 } else { 2054 PetscCall(ISGetSize(rowp, &nrows)); 2055 PetscCall(ISGetIndices(rowp, &rows)); 2056 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2057 PetscCall(ISRestoreIndices(rowp, &rows)); 2058 } 2059 PetscCall(ISSetPermutation(crowp)); 2060 /* make a local version of 'colp' */ 2061 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2062 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2063 if (size == 1) { 2064 lcolp = colp; 2065 } else { 2066 PetscCall(ISAllGather(colp, &lcolp)); 2067 } 2068 PetscCall(ISSetPermutation(lcolp)); 2069 /* now we just get the submatrix */ 2070 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2071 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2072 /* clean up */ 2073 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2074 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2075 PetscFunctionReturn(PETSC_SUCCESS); 2076 } 2077 2078 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2079 { 2080 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2081 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2082 2083 PetscFunctionBegin; 2084 if (nghosts) *nghosts = B->nbs; 2085 if (ghosts) *ghosts = baij->garray; 2086 PetscFunctionReturn(PETSC_SUCCESS); 2087 } 2088 2089 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2090 { 2091 Mat B; 2092 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2093 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2094 Mat_SeqAIJ *b; 2095 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2096 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2097 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2098 2099 PetscFunctionBegin; 2100 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2101 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2102 2103 /* Tell every processor the number of nonzeros per row */ 2104 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2105 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2106 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2107 displs = recvcounts + size; 2108 for (i = 0; i < size; i++) { 2109 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2110 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2111 } 2112 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2113 /* Create the sequential matrix of the same type as the local block diagonal */ 2114 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2115 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2116 PetscCall(MatSetType(B, MATSEQAIJ)); 2117 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2118 b = (Mat_SeqAIJ *)B->data; 2119 2120 /* Copy my part of matrix column indices over */ 2121 sendcount = ad->nz + bd->nz; 2122 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2123 a_jsendbuf = ad->j; 2124 b_jsendbuf = bd->j; 2125 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2126 cnt = 0; 2127 for (i = 0; i < n; i++) { 2128 /* put in lower diagonal portion */ 2129 m = bd->i[i + 1] - bd->i[i]; 2130 while (m > 0) { 2131 /* is it above diagonal (in bd (compressed) numbering) */ 2132 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2133 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2134 m--; 2135 } 2136 2137 /* put in diagonal portion */ 2138 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2139 2140 /* put in upper diagonal portion */ 2141 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2142 } 2143 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2144 2145 /* Gather all column indices to all processors */ 2146 for (i = 0; i < size; i++) { 2147 recvcounts[i] = 0; 2148 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2149 } 2150 displs[0] = 0; 2151 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2152 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2153 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2154 /* set the b->ilen (length of each row) values */ 2155 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2156 /* set the b->i indices */ 2157 b->i[0] = 0; 2158 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2159 PetscCall(PetscFree(lens)); 2160 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2161 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2162 PetscCall(PetscFree(recvcounts)); 2163 2164 PetscCall(MatPropagateSymmetryOptions(A, B)); 2165 *newmat = B; 2166 PetscFunctionReturn(PETSC_SUCCESS); 2167 } 2168 2169 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2170 { 2171 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2172 Vec bb1 = NULL; 2173 2174 PetscFunctionBegin; 2175 if (flag == SOR_APPLY_UPPER) { 2176 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2181 2182 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2183 if (flag & SOR_ZERO_INITIAL_GUESS) { 2184 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2185 its--; 2186 } 2187 2188 while (its--) { 2189 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2190 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2191 2192 /* update rhs: bb1 = bb - B*x */ 2193 PetscCall(VecScale(mat->lvec, -1.0)); 2194 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2195 2196 /* local sweep */ 2197 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2198 } 2199 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2200 if (flag & SOR_ZERO_INITIAL_GUESS) { 2201 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2202 its--; 2203 } 2204 while (its--) { 2205 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2206 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2207 2208 /* update rhs: bb1 = bb - B*x */ 2209 PetscCall(VecScale(mat->lvec, -1.0)); 2210 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2211 2212 /* local sweep */ 2213 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2214 } 2215 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2216 if (flag & SOR_ZERO_INITIAL_GUESS) { 2217 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2218 its--; 2219 } 2220 while (its--) { 2221 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2222 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2223 2224 /* update rhs: bb1 = bb - B*x */ 2225 PetscCall(VecScale(mat->lvec, -1.0)); 2226 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2227 2228 /* local sweep */ 2229 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2230 } 2231 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2232 2233 PetscCall(VecDestroy(&bb1)); 2234 PetscFunctionReturn(PETSC_SUCCESS); 2235 } 2236 2237 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2238 { 2239 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2240 PetscInt m, N, i, *garray = aij->garray; 2241 PetscInt ib, jb, bs = A->rmap->bs; 2242 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2243 MatScalar *a_val = a_aij->a; 2244 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2245 MatScalar *b_val = b_aij->a; 2246 PetscReal *work; 2247 2248 PetscFunctionBegin; 2249 PetscCall(MatGetSize(A, &m, &N)); 2250 PetscCall(PetscCalloc1(N, &work)); 2251 if (type == NORM_2) { 2252 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2253 for (jb = 0; jb < bs; jb++) { 2254 for (ib = 0; ib < bs; ib++) { 2255 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2256 a_val++; 2257 } 2258 } 2259 } 2260 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2261 for (jb = 0; jb < bs; jb++) { 2262 for (ib = 0; ib < bs; ib++) { 2263 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2264 b_val++; 2265 } 2266 } 2267 } 2268 } else if (type == NORM_1) { 2269 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2270 for (jb = 0; jb < bs; jb++) { 2271 for (ib = 0; ib < bs; ib++) { 2272 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2273 a_val++; 2274 } 2275 } 2276 } 2277 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2278 for (jb = 0; jb < bs; jb++) { 2279 for (ib = 0; ib < bs; ib++) { 2280 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2281 b_val++; 2282 } 2283 } 2284 } 2285 } else if (type == NORM_INFINITY) { 2286 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2287 for (jb = 0; jb < bs; jb++) { 2288 for (ib = 0; ib < bs; ib++) { 2289 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2290 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2291 a_val++; 2292 } 2293 } 2294 } 2295 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2296 for (jb = 0; jb < bs; jb++) { 2297 for (ib = 0; ib < bs; ib++) { 2298 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2299 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2300 b_val++; 2301 } 2302 } 2303 } 2304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2305 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2306 for (jb = 0; jb < bs; jb++) { 2307 for (ib = 0; ib < bs; ib++) { 2308 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2309 a_val++; 2310 } 2311 } 2312 } 2313 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2314 for (jb = 0; jb < bs; jb++) { 2315 for (ib = 0; ib < bs; ib++) { 2316 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2317 b_val++; 2318 } 2319 } 2320 } 2321 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2322 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2323 for (jb = 0; jb < bs; jb++) { 2324 for (ib = 0; ib < bs; ib++) { 2325 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2326 a_val++; 2327 } 2328 } 2329 } 2330 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2331 for (jb = 0; jb < bs; jb++) { 2332 for (ib = 0; ib < bs; ib++) { 2333 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2334 b_val++; 2335 } 2336 } 2337 } 2338 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2339 if (type == NORM_INFINITY) { 2340 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2341 } else { 2342 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2343 } 2344 PetscCall(PetscFree(work)); 2345 if (type == NORM_2) { 2346 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2347 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2348 for (i = 0; i < N; i++) reductions[i] /= m; 2349 } 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } 2352 2353 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2354 { 2355 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2356 2357 PetscFunctionBegin; 2358 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2359 A->factorerrortype = a->A->factorerrortype; 2360 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2361 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2362 PetscFunctionReturn(PETSC_SUCCESS); 2363 } 2364 2365 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2366 { 2367 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2368 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2369 2370 PetscFunctionBegin; 2371 if (!Y->preallocated) { 2372 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2373 } else if (!aij->nz) { 2374 PetscInt nonew = aij->nonew; 2375 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2376 aij->nonew = nonew; 2377 } 2378 PetscCall(MatShift_Basic(Y, a)); 2379 PetscFunctionReturn(PETSC_SUCCESS); 2380 } 2381 2382 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2383 { 2384 PetscFunctionBegin; 2385 *a = ((Mat_MPIBAIJ *)A->data)->A; 2386 PetscFunctionReturn(PETSC_SUCCESS); 2387 } 2388 2389 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2390 { 2391 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2392 2393 PetscFunctionBegin; 2394 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2395 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2396 PetscFunctionReturn(PETSC_SUCCESS); 2397 } 2398 2399 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2400 MatGetRow_MPIBAIJ, 2401 MatRestoreRow_MPIBAIJ, 2402 MatMult_MPIBAIJ, 2403 /* 4*/ MatMultAdd_MPIBAIJ, 2404 MatMultTranspose_MPIBAIJ, 2405 MatMultTransposeAdd_MPIBAIJ, 2406 NULL, 2407 NULL, 2408 NULL, 2409 /*10*/ NULL, 2410 NULL, 2411 NULL, 2412 MatSOR_MPIBAIJ, 2413 MatTranspose_MPIBAIJ, 2414 /*15*/ MatGetInfo_MPIBAIJ, 2415 MatEqual_MPIBAIJ, 2416 MatGetDiagonal_MPIBAIJ, 2417 MatDiagonalScale_MPIBAIJ, 2418 MatNorm_MPIBAIJ, 2419 /*20*/ MatAssemblyBegin_MPIBAIJ, 2420 MatAssemblyEnd_MPIBAIJ, 2421 MatSetOption_MPIBAIJ, 2422 MatZeroEntries_MPIBAIJ, 2423 /*24*/ MatZeroRows_MPIBAIJ, 2424 NULL, 2425 NULL, 2426 NULL, 2427 NULL, 2428 /*29*/ MatSetUp_MPI_Hash, 2429 NULL, 2430 NULL, 2431 MatGetDiagonalBlock_MPIBAIJ, 2432 NULL, 2433 /*34*/ MatDuplicate_MPIBAIJ, 2434 NULL, 2435 NULL, 2436 NULL, 2437 NULL, 2438 /*39*/ MatAXPY_MPIBAIJ, 2439 MatCreateSubMatrices_MPIBAIJ, 2440 MatIncreaseOverlap_MPIBAIJ, 2441 MatGetValues_MPIBAIJ, 2442 MatCopy_MPIBAIJ, 2443 /*44*/ NULL, 2444 MatScale_MPIBAIJ, 2445 MatShift_MPIBAIJ, 2446 NULL, 2447 MatZeroRowsColumns_MPIBAIJ, 2448 /*49*/ NULL, 2449 NULL, 2450 NULL, 2451 NULL, 2452 NULL, 2453 /*54*/ MatFDColoringCreate_MPIXAIJ, 2454 NULL, 2455 MatSetUnfactored_MPIBAIJ, 2456 MatPermute_MPIBAIJ, 2457 MatSetValuesBlocked_MPIBAIJ, 2458 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2459 MatDestroy_MPIBAIJ, 2460 MatView_MPIBAIJ, 2461 NULL, 2462 NULL, 2463 /*64*/ NULL, 2464 NULL, 2465 NULL, 2466 NULL, 2467 MatGetRowMaxAbs_MPIBAIJ, 2468 /*69*/ NULL, 2469 NULL, 2470 NULL, 2471 MatFDColoringApply_BAIJ, 2472 NULL, 2473 /*74*/ NULL, 2474 NULL, 2475 NULL, 2476 NULL, 2477 MatLoad_MPIBAIJ, 2478 /*79*/ NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 NULL, 2483 /*84*/ NULL, 2484 NULL, 2485 NULL, 2486 NULL, 2487 NULL, 2488 /*89*/ NULL, 2489 NULL, 2490 NULL, 2491 NULL, 2492 MatConjugate_MPIBAIJ, 2493 /*94*/ NULL, 2494 NULL, 2495 MatRealPart_MPIBAIJ, 2496 MatImaginaryPart_MPIBAIJ, 2497 NULL, 2498 /*99*/ NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 NULL, 2503 /*104*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2504 NULL, 2505 MatGetGhosts_MPIBAIJ, 2506 NULL, 2507 NULL, 2508 /*109*/ NULL, 2509 NULL, 2510 NULL, 2511 NULL, 2512 MatGetMultiProcBlock_MPIBAIJ, 2513 /*114*/ NULL, 2514 MatGetColumnReductions_MPIBAIJ, 2515 MatInvertBlockDiagonal_MPIBAIJ, 2516 NULL, 2517 NULL, 2518 /*119*/ NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 /*124*/ NULL, 2524 NULL, 2525 MatSetBlockSizes_Default, 2526 NULL, 2527 MatFDColoringSetUp_MPIXAIJ, 2528 /*129*/ NULL, 2529 MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2530 NULL, 2531 NULL, 2532 NULL, 2533 /*134*/ NULL, 2534 NULL, 2535 MatEliminateZeros_MPIBAIJ, 2536 MatGetRowSumAbs_MPIBAIJ, 2537 NULL, 2538 /*139*/ NULL, 2539 NULL, 2540 MatCopyHashToXAIJ_MPI_Hash, 2541 NULL, 2542 NULL}; 2543 2544 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2545 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2546 2547 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2548 { 2549 PetscInt m, rstart, cstart, cend; 2550 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2551 const PetscInt *JJ = NULL; 2552 PetscScalar *values = NULL; 2553 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2554 PetscBool nooffprocentries; 2555 2556 PetscFunctionBegin; 2557 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2558 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2559 PetscCall(PetscLayoutSetUp(B->rmap)); 2560 PetscCall(PetscLayoutSetUp(B->cmap)); 2561 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2562 m = B->rmap->n / bs; 2563 rstart = B->rmap->rstart / bs; 2564 cstart = B->cmap->rstart / bs; 2565 cend = B->cmap->rend / bs; 2566 2567 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2568 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2569 for (i = 0; i < m; i++) { 2570 nz = ii[i + 1] - ii[i]; 2571 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2572 nz_max = PetscMax(nz_max, nz); 2573 dlen = 0; 2574 olen = 0; 2575 JJ = jj + ii[i]; 2576 for (j = 0; j < nz; j++) { 2577 if (*JJ < cstart || *JJ >= cend) olen++; 2578 else dlen++; 2579 JJ++; 2580 } 2581 d_nnz[i] = dlen; 2582 o_nnz[i] = olen; 2583 } 2584 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2585 PetscCall(PetscFree2(d_nnz, o_nnz)); 2586 2587 values = (PetscScalar *)V; 2588 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2589 for (i = 0; i < m; i++) { 2590 PetscInt row = i + rstart; 2591 PetscInt ncols = ii[i + 1] - ii[i]; 2592 const PetscInt *icols = jj + ii[i]; 2593 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2594 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2595 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2596 } else { /* block ordering does not match so we can only insert one block at a time. */ 2597 PetscInt j; 2598 for (j = 0; j < ncols; j++) { 2599 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2600 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2601 } 2602 } 2603 } 2604 2605 if (!V) PetscCall(PetscFree(values)); 2606 nooffprocentries = B->nooffprocentries; 2607 B->nooffprocentries = PETSC_TRUE; 2608 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2609 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2610 B->nooffprocentries = nooffprocentries; 2611 2612 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2613 PetscFunctionReturn(PETSC_SUCCESS); 2614 } 2615 2616 /*@C 2617 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2618 2619 Collective 2620 2621 Input Parameters: 2622 + B - the matrix 2623 . bs - the block size 2624 . i - the indices into `j` for the start of each local row (starts with zero) 2625 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2626 - v - optional values in the matrix, use `NULL` if not provided 2627 2628 Level: advanced 2629 2630 Notes: 2631 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2632 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2633 called this routine. 2634 2635 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2636 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2637 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2638 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2639 block column and the second index is over columns within a block. 2640 2641 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2642 2643 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2644 @*/ 2645 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2646 { 2647 PetscFunctionBegin; 2648 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2649 PetscValidType(B, 1); 2650 PetscValidLogicalCollectiveInt(B, bs, 2); 2651 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2656 { 2657 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2658 PetscInt i; 2659 PetscMPIInt size; 2660 2661 PetscFunctionBegin; 2662 if (B->hash_active) { 2663 B->ops[0] = b->cops; 2664 B->hash_active = PETSC_FALSE; 2665 } 2666 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2667 PetscCall(MatSetBlockSize(B, bs)); 2668 PetscCall(PetscLayoutSetUp(B->rmap)); 2669 PetscCall(PetscLayoutSetUp(B->cmap)); 2670 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2671 2672 if (d_nnz) { 2673 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2674 } 2675 if (o_nnz) { 2676 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2677 } 2678 2679 b->bs2 = bs * bs; 2680 b->mbs = B->rmap->n / bs; 2681 b->nbs = B->cmap->n / bs; 2682 b->Mbs = B->rmap->N / bs; 2683 b->Nbs = B->cmap->N / bs; 2684 2685 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2686 b->rstartbs = B->rmap->rstart / bs; 2687 b->rendbs = B->rmap->rend / bs; 2688 b->cstartbs = B->cmap->rstart / bs; 2689 b->cendbs = B->cmap->rend / bs; 2690 2691 #if defined(PETSC_USE_CTABLE) 2692 PetscCall(PetscHMapIDestroy(&b->colmap)); 2693 #else 2694 PetscCall(PetscFree(b->colmap)); 2695 #endif 2696 PetscCall(PetscFree(b->garray)); 2697 PetscCall(VecDestroy(&b->lvec)); 2698 PetscCall(VecScatterDestroy(&b->Mvctx)); 2699 2700 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2701 2702 MatSeqXAIJGetOptions_Private(b->B); 2703 PetscCall(MatDestroy(&b->B)); 2704 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2705 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2706 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2707 MatSeqXAIJRestoreOptions_Private(b->B); 2708 2709 MatSeqXAIJGetOptions_Private(b->A); 2710 PetscCall(MatDestroy(&b->A)); 2711 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2712 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2713 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2714 MatSeqXAIJRestoreOptions_Private(b->A); 2715 2716 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2717 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2718 B->preallocated = PETSC_TRUE; 2719 B->was_assembled = PETSC_FALSE; 2720 B->assembled = PETSC_FALSE; 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2725 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2726 2727 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2728 { 2729 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2730 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2731 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2732 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2733 2734 PetscFunctionBegin; 2735 PetscCall(PetscMalloc1(M + 1, &ii)); 2736 ii[0] = 0; 2737 for (i = 0; i < M; i++) { 2738 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2739 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2740 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2741 /* remove one from count of matrix has diagonal */ 2742 for (j = id[i]; j < id[i + 1]; j++) { 2743 if (jd[j] == i) { 2744 ii[i + 1]--; 2745 break; 2746 } 2747 } 2748 } 2749 PetscCall(PetscMalloc1(ii[M], &jj)); 2750 cnt = 0; 2751 for (i = 0; i < M; i++) { 2752 for (j = io[i]; j < io[i + 1]; j++) { 2753 if (garray[jo[j]] > rstart) break; 2754 jj[cnt++] = garray[jo[j]]; 2755 } 2756 for (k = id[i]; k < id[i + 1]; k++) { 2757 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2758 } 2759 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2760 } 2761 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2762 PetscFunctionReturn(PETSC_SUCCESS); 2763 } 2764 2765 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2766 2767 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2768 2769 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2770 { 2771 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2772 Mat_MPIAIJ *b; 2773 Mat B; 2774 2775 PetscFunctionBegin; 2776 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2777 2778 if (reuse == MAT_REUSE_MATRIX) { 2779 B = *newmat; 2780 } else { 2781 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2782 PetscCall(MatSetType(B, MATMPIAIJ)); 2783 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2784 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2785 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2786 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2787 } 2788 b = (Mat_MPIAIJ *)B->data; 2789 2790 if (reuse == MAT_REUSE_MATRIX) { 2791 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2792 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2793 } else { 2794 PetscInt *garray = a->garray; 2795 Mat_SeqAIJ *bB; 2796 PetscInt bs, nnz; 2797 PetscCall(MatDestroy(&b->A)); 2798 PetscCall(MatDestroy(&b->B)); 2799 /* just clear out the data structure */ 2800 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2801 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2802 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2803 2804 /* Global numbering for b->B columns */ 2805 bB = (Mat_SeqAIJ *)b->B->data; 2806 bs = A->rmap->bs; 2807 nnz = bB->i[A->rmap->n]; 2808 for (PetscInt k = 0; k < nnz; k++) { 2809 PetscInt bj = bB->j[k] / bs; 2810 PetscInt br = bB->j[k] % bs; 2811 bB->j[k] = garray[bj] * bs + br; 2812 } 2813 } 2814 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2815 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2816 2817 if (reuse == MAT_INPLACE_MATRIX) { 2818 PetscCall(MatHeaderReplace(A, &B)); 2819 } else { 2820 *newmat = B; 2821 } 2822 PetscFunctionReturn(PETSC_SUCCESS); 2823 } 2824 2825 /*MC 2826 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2827 2828 Options Database Keys: 2829 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2830 . -mat_block_size <bs> - set the blocksize used to store the matrix 2831 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2832 - -mat_use_hash_table <fact> - set hash table factor 2833 2834 Level: beginner 2835 2836 Note: 2837 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2838 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2839 2840 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2841 M*/ 2842 2843 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2844 2845 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2846 { 2847 Mat_MPIBAIJ *b; 2848 PetscBool flg = PETSC_FALSE; 2849 2850 PetscFunctionBegin; 2851 PetscCall(PetscNew(&b)); 2852 B->data = (void *)b; 2853 B->ops[0] = MatOps_Values; 2854 B->assembled = PETSC_FALSE; 2855 2856 B->insertmode = NOT_SET_VALUES; 2857 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2858 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2859 2860 /* build local table of row and column ownerships */ 2861 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2862 2863 /* build cache for off array entries formed */ 2864 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2865 2866 b->donotstash = PETSC_FALSE; 2867 b->colmap = NULL; 2868 b->garray = NULL; 2869 b->roworiented = PETSC_TRUE; 2870 2871 /* stuff used in block assembly */ 2872 b->barray = NULL; 2873 2874 /* stuff used for matrix vector multiply */ 2875 b->lvec = NULL; 2876 b->Mvctx = NULL; 2877 2878 /* stuff for MatGetRow() */ 2879 b->rowindices = NULL; 2880 b->rowvalues = NULL; 2881 b->getrowactive = PETSC_FALSE; 2882 2883 /* hash table stuff */ 2884 b->ht = NULL; 2885 b->hd = NULL; 2886 b->ht_size = 0; 2887 b->ht_flag = PETSC_FALSE; 2888 b->ht_fact = 0; 2889 b->ht_total_ct = 0; 2890 b->ht_insert_ct = 0; 2891 2892 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2893 b->ijonly = PETSC_FALSE; 2894 2895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2898 #if defined(PETSC_HAVE_HYPRE) 2899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2900 #endif 2901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2908 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2909 2910 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2911 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2912 if (flg) { 2913 PetscReal fact = 1.39; 2914 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2915 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2916 if (fact <= 1.0) fact = 1.39; 2917 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2918 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2919 } 2920 PetscOptionsEnd(); 2921 PetscFunctionReturn(PETSC_SUCCESS); 2922 } 2923 2924 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2925 /*MC 2926 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2927 2928 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2929 and `MATMPIBAIJ` otherwise. 2930 2931 Options Database Keys: 2932 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2933 2934 Level: beginner 2935 2936 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2937 M*/ 2938 2939 /*@ 2940 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2941 (block compressed row). 2942 2943 Collective 2944 2945 Input Parameters: 2946 + B - the matrix 2947 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2948 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2949 . d_nz - number of block nonzeros per block row in diagonal portion of local 2950 submatrix (same for all local rows) 2951 . d_nnz - array containing the number of block nonzeros in the various block rows 2952 of the in diagonal portion of the local (possibly different for each block 2953 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2954 set it even if it is zero. 2955 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2956 submatrix (same for all local rows). 2957 - o_nnz - array containing the number of nonzeros in the various block rows of the 2958 off-diagonal portion of the local submatrix (possibly different for 2959 each block row) or `NULL`. 2960 2961 If the *_nnz parameter is given then the *_nz parameter is ignored 2962 2963 Options Database Keys: 2964 + -mat_block_size - size of the blocks to use 2965 - -mat_use_hash_table <fact> - set hash table factor 2966 2967 Level: intermediate 2968 2969 Notes: 2970 For good matrix assembly performance 2971 the user should preallocate the matrix storage by setting the parameters 2972 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 2973 performance can be increased by more than a factor of 50. 2974 2975 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2976 than it must be used on all processors that share the object for that argument. 2977 2978 Storage Information: 2979 For a square global matrix we define each processor's diagonal portion 2980 to be its local rows and the corresponding columns (a square submatrix); 2981 each processor's off-diagonal portion encompasses the remainder of the 2982 local matrix (a rectangular submatrix). 2983 2984 The user can specify preallocated storage for the diagonal part of 2985 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2986 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2987 memory allocation. Likewise, specify preallocated storage for the 2988 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2989 2990 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2991 the figure below we depict these three local rows and all columns (0-11). 2992 2993 .vb 2994 0 1 2 3 4 5 6 7 8 9 10 11 2995 -------------------------- 2996 row 3 |o o o d d d o o o o o o 2997 row 4 |o o o d d d o o o o o o 2998 row 5 |o o o d d d o o o o o o 2999 -------------------------- 3000 .ve 3001 3002 Thus, any entries in the d locations are stored in the d (diagonal) 3003 submatrix, and any entries in the o locations are stored in the 3004 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3005 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3006 3007 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3008 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3009 In general, for PDE problems in which most nonzeros are near the diagonal, 3010 one expects `d_nz` >> `o_nz`. 3011 3012 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3013 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3014 You can also run with the option `-info` and look for messages with the string 3015 malloc in them to see if additional memory allocation was needed. 3016 3017 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3018 @*/ 3019 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3020 { 3021 PetscFunctionBegin; 3022 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3023 PetscValidType(B, 1); 3024 PetscValidLogicalCollectiveInt(B, bs, 2); 3025 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3026 PetscFunctionReturn(PETSC_SUCCESS); 3027 } 3028 3029 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3030 /*@ 3031 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3032 (block compressed row). 3033 3034 Collective 3035 3036 Input Parameters: 3037 + comm - MPI communicator 3038 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3039 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3040 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3041 This value should be the same as the local size used in creating the 3042 y vector for the matrix-vector product y = Ax. 3043 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3044 This value should be the same as the local size used in creating the 3045 x vector for the matrix-vector product y = Ax. 3046 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3047 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3048 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3049 submatrix (same for all local rows) 3050 . d_nnz - array containing the number of nonzero blocks in the various block rows 3051 of the in diagonal portion of the local (possibly different for each block 3052 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3053 and set it even if it is zero. 3054 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3055 submatrix (same for all local rows). 3056 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3057 off-diagonal portion of the local submatrix (possibly different for 3058 each block row) or NULL. 3059 3060 Output Parameter: 3061 . A - the matrix 3062 3063 Options Database Keys: 3064 + -mat_block_size - size of the blocks to use 3065 - -mat_use_hash_table <fact> - set hash table factor 3066 3067 Level: intermediate 3068 3069 Notes: 3070 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3071 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3072 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3073 3074 For good matrix assembly performance 3075 the user should preallocate the matrix storage by setting the parameters 3076 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3077 performance can be increased by more than a factor of 50. 3078 3079 If the *_nnz parameter is given then the *_nz parameter is ignored 3080 3081 A nonzero block is any block that as 1 or more nonzeros in it 3082 3083 The user MUST specify either the local or global matrix dimensions 3084 (possibly both). 3085 3086 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3087 than it must be used on all processors that share the object for that argument. 3088 3089 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3090 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3091 3092 Storage Information: 3093 For a square global matrix we define each processor's diagonal portion 3094 to be its local rows and the corresponding columns (a square submatrix); 3095 each processor's off-diagonal portion encompasses the remainder of the 3096 local matrix (a rectangular submatrix). 3097 3098 The user can specify preallocated storage for the diagonal part of 3099 the local submatrix with either d_nz or d_nnz (not both). Set 3100 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3101 memory allocation. Likewise, specify preallocated storage for the 3102 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3103 3104 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3105 the figure below we depict these three local rows and all columns (0-11). 3106 3107 .vb 3108 0 1 2 3 4 5 6 7 8 9 10 11 3109 -------------------------- 3110 row 3 |o o o d d d o o o o o o 3111 row 4 |o o o d d d o o o o o o 3112 row 5 |o o o d d d o o o o o o 3113 -------------------------- 3114 .ve 3115 3116 Thus, any entries in the d locations are stored in the d (diagonal) 3117 submatrix, and any entries in the o locations are stored in the 3118 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3119 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3120 3121 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3122 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3123 In general, for PDE problems in which most nonzeros are near the diagonal, 3124 one expects `d_nz` >> `o_nz`. 3125 3126 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3127 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3128 @*/ 3129 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3130 { 3131 PetscMPIInt size; 3132 3133 PetscFunctionBegin; 3134 PetscCall(MatCreate(comm, A)); 3135 PetscCall(MatSetSizes(*A, m, n, M, N)); 3136 PetscCallMPI(MPI_Comm_size(comm, &size)); 3137 if (size > 1) { 3138 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3139 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3140 } else { 3141 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3142 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3143 } 3144 PetscFunctionReturn(PETSC_SUCCESS); 3145 } 3146 3147 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3148 { 3149 Mat mat; 3150 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3151 PetscInt len = 0; 3152 3153 PetscFunctionBegin; 3154 *newmat = NULL; 3155 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3156 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3157 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3158 3159 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3160 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3161 if (matin->hash_active) { 3162 PetscCall(MatSetUp(mat)); 3163 } else { 3164 mat->factortype = matin->factortype; 3165 mat->preallocated = PETSC_TRUE; 3166 mat->assembled = PETSC_TRUE; 3167 mat->insertmode = NOT_SET_VALUES; 3168 3169 a = (Mat_MPIBAIJ *)mat->data; 3170 mat->rmap->bs = matin->rmap->bs; 3171 a->bs2 = oldmat->bs2; 3172 a->mbs = oldmat->mbs; 3173 a->nbs = oldmat->nbs; 3174 a->Mbs = oldmat->Mbs; 3175 a->Nbs = oldmat->Nbs; 3176 3177 a->size = oldmat->size; 3178 a->rank = oldmat->rank; 3179 a->donotstash = oldmat->donotstash; 3180 a->roworiented = oldmat->roworiented; 3181 a->rowindices = NULL; 3182 a->rowvalues = NULL; 3183 a->getrowactive = PETSC_FALSE; 3184 a->barray = NULL; 3185 a->rstartbs = oldmat->rstartbs; 3186 a->rendbs = oldmat->rendbs; 3187 a->cstartbs = oldmat->cstartbs; 3188 a->cendbs = oldmat->cendbs; 3189 3190 /* hash table stuff */ 3191 a->ht = NULL; 3192 a->hd = NULL; 3193 a->ht_size = 0; 3194 a->ht_flag = oldmat->ht_flag; 3195 a->ht_fact = oldmat->ht_fact; 3196 a->ht_total_ct = 0; 3197 a->ht_insert_ct = 0; 3198 3199 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3200 if (oldmat->colmap) { 3201 #if defined(PETSC_USE_CTABLE) 3202 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3203 #else 3204 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3205 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3206 #endif 3207 } else a->colmap = NULL; 3208 3209 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3210 PetscCall(PetscMalloc1(len, &a->garray)); 3211 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3212 } else a->garray = NULL; 3213 3214 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3215 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3216 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3217 3218 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3219 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3220 } 3221 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3222 *newmat = mat; 3223 PetscFunctionReturn(PETSC_SUCCESS); 3224 } 3225 3226 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3227 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3228 { 3229 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3230 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3231 PetscScalar *matvals; 3232 3233 PetscFunctionBegin; 3234 PetscCall(PetscViewerSetUp(viewer)); 3235 3236 /* read in matrix header */ 3237 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3238 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3239 M = header[1]; 3240 N = header[2]; 3241 nz = header[3]; 3242 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3243 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3244 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3245 3246 /* set block sizes from the viewer's .info file */ 3247 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3248 /* set local sizes if not set already */ 3249 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3250 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3251 /* set global sizes if not set already */ 3252 if (mat->rmap->N < 0) mat->rmap->N = M; 3253 if (mat->cmap->N < 0) mat->cmap->N = N; 3254 PetscCall(PetscLayoutSetUp(mat->rmap)); 3255 PetscCall(PetscLayoutSetUp(mat->cmap)); 3256 3257 /* check if the matrix sizes are correct */ 3258 PetscCall(MatGetSize(mat, &rows, &cols)); 3259 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3260 PetscCall(MatGetBlockSize(mat, &bs)); 3261 PetscCall(MatGetLocalSize(mat, &m, &n)); 3262 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3263 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3264 mbs = m / bs; 3265 nbs = n / bs; 3266 3267 /* read in row lengths and build row indices */ 3268 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3269 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3270 rowidxs[0] = 0; 3271 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3272 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3273 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3274 3275 /* read in column indices and matrix values */ 3276 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3277 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3278 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3279 3280 { /* preallocate matrix storage */ 3281 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3282 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3283 PetscBool sbaij, done; 3284 PetscInt *d_nnz, *o_nnz; 3285 3286 PetscCall(PetscBTCreate(nbs, &bt)); 3287 PetscCall(PetscHSetICreate(&ht)); 3288 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3289 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3290 for (i = 0; i < mbs; i++) { 3291 PetscCall(PetscBTMemzero(nbs, bt)); 3292 PetscCall(PetscHSetIClear(ht)); 3293 for (k = 0; k < bs; k++) { 3294 PetscInt row = bs * i + k; 3295 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3296 PetscInt col = colidxs[j]; 3297 if (!sbaij || col >= row) { 3298 if (col >= cs && col < ce) { 3299 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3300 } else { 3301 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3302 if (done) o_nnz[i]++; 3303 } 3304 } 3305 } 3306 } 3307 } 3308 PetscCall(PetscBTDestroy(&bt)); 3309 PetscCall(PetscHSetIDestroy(&ht)); 3310 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3311 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3312 PetscCall(PetscFree2(d_nnz, o_nnz)); 3313 } 3314 3315 /* store matrix values */ 3316 for (i = 0; i < m; i++) { 3317 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3318 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3319 } 3320 3321 PetscCall(PetscFree(rowidxs)); 3322 PetscCall(PetscFree2(colidxs, matvals)); 3323 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3324 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3325 PetscFunctionReturn(PETSC_SUCCESS); 3326 } 3327 3328 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3329 { 3330 PetscBool isbinary; 3331 3332 PetscFunctionBegin; 3333 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3334 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3335 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3336 PetscFunctionReturn(PETSC_SUCCESS); 3337 } 3338 3339 /*@ 3340 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3341 3342 Input Parameters: 3343 + mat - the matrix 3344 - fact - factor 3345 3346 Options Database Key: 3347 . -mat_use_hash_table <fact> - provide the factor 3348 3349 Level: advanced 3350 3351 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3352 @*/ 3353 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3354 { 3355 PetscFunctionBegin; 3356 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3357 PetscFunctionReturn(PETSC_SUCCESS); 3358 } 3359 3360 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3361 { 3362 Mat_MPIBAIJ *baij; 3363 3364 PetscFunctionBegin; 3365 baij = (Mat_MPIBAIJ *)mat->data; 3366 baij->ht_fact = fact; 3367 PetscFunctionReturn(PETSC_SUCCESS); 3368 } 3369 3370 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3371 { 3372 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3373 PetscBool flg; 3374 3375 PetscFunctionBegin; 3376 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3377 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3378 if (Ad) *Ad = a->A; 3379 if (Ao) *Ao = a->B; 3380 if (colmap) *colmap = a->garray; 3381 PetscFunctionReturn(PETSC_SUCCESS); 3382 } 3383 3384 /* 3385 Special version for direct calls from Fortran (to eliminate two function call overheads 3386 */ 3387 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3388 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3389 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3390 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3391 #endif 3392 3393 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3394 /*@C 3395 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3396 3397 Collective 3398 3399 Input Parameters: 3400 + matin - the matrix 3401 . min - number of input rows 3402 . im - input rows 3403 . nin - number of input columns 3404 . in - input columns 3405 . v - numerical values input 3406 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3407 3408 Level: advanced 3409 3410 Developer Notes: 3411 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3412 3413 .seealso: `Mat`, `MatSetValuesBlocked()` 3414 @*/ 3415 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3416 { 3417 /* convert input arguments to C version */ 3418 Mat mat = *matin; 3419 PetscInt m = *min, n = *nin; 3420 InsertMode addv = *addvin; 3421 3422 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3423 const MatScalar *value; 3424 MatScalar *barray = baij->barray; 3425 PetscBool roworiented = baij->roworiented; 3426 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3427 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3428 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3429 3430 PetscFunctionBegin; 3431 /* tasks normally handled by MatSetValuesBlocked() */ 3432 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3433 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3434 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3435 if (mat->assembled) { 3436 mat->was_assembled = PETSC_TRUE; 3437 mat->assembled = PETSC_FALSE; 3438 } 3439 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3440 3441 if (!barray) { 3442 PetscCall(PetscMalloc1(bs2, &barray)); 3443 baij->barray = barray; 3444 } 3445 3446 if (roworiented) stepval = (n - 1) * bs; 3447 else stepval = (m - 1) * bs; 3448 3449 for (i = 0; i < m; i++) { 3450 if (im[i] < 0) continue; 3451 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3452 if (im[i] >= rstart && im[i] < rend) { 3453 row = im[i] - rstart; 3454 for (j = 0; j < n; j++) { 3455 /* If NumCol = 1 then a copy is not required */ 3456 if ((roworiented) && (n == 1)) { 3457 barray = (MatScalar *)v + i * bs2; 3458 } else if ((!roworiented) && (m == 1)) { 3459 barray = (MatScalar *)v + j * bs2; 3460 } else { /* Here a copy is required */ 3461 if (roworiented) { 3462 value = v + i * (stepval + bs) * bs + j * bs; 3463 } else { 3464 value = v + j * (stepval + bs) * bs + i * bs; 3465 } 3466 for (ii = 0; ii < bs; ii++, value += stepval) { 3467 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3468 } 3469 barray -= bs2; 3470 } 3471 3472 if (in[j] >= cstart && in[j] < cend) { 3473 col = in[j] - cstart; 3474 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3475 } else if (in[j] < 0) { 3476 continue; 3477 } else { 3478 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3479 if (mat->was_assembled) { 3480 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3481 3482 #if defined(PETSC_USE_DEBUG) 3483 #if defined(PETSC_USE_CTABLE) 3484 { 3485 PetscInt data; 3486 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3487 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3488 } 3489 #else 3490 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3491 #endif 3492 #endif 3493 #if defined(PETSC_USE_CTABLE) 3494 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3495 col = (col - 1) / bs; 3496 #else 3497 col = (baij->colmap[in[j]] - 1) / bs; 3498 #endif 3499 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3500 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3501 col = in[j]; 3502 } 3503 } else col = in[j]; 3504 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3505 } 3506 } 3507 } else { 3508 if (!baij->donotstash) { 3509 if (roworiented) { 3510 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3511 } else { 3512 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3513 } 3514 } 3515 } 3516 } 3517 3518 /* task normally handled by MatSetValuesBlocked() */ 3519 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3520 PetscFunctionReturn(PETSC_SUCCESS); 3521 } 3522 3523 /*@ 3524 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3525 3526 Collective 3527 3528 Input Parameters: 3529 + comm - MPI communicator 3530 . bs - the block size, only a block size of 1 is supported 3531 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3532 . n - This value should be the same as the local size used in creating the 3533 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3534 calculated if `N` is given) For square matrices `n` is almost always `m`. 3535 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3536 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3537 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3538 . j - column indices 3539 - a - matrix values 3540 3541 Output Parameter: 3542 . mat - the matrix 3543 3544 Level: intermediate 3545 3546 Notes: 3547 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3548 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3549 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3550 3551 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3552 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3553 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3554 with column-major ordering within blocks. 3555 3556 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3557 3558 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3559 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3560 @*/ 3561 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3562 { 3563 PetscFunctionBegin; 3564 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3565 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3566 PetscCall(MatCreate(comm, mat)); 3567 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3568 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3569 PetscCall(MatSetBlockSize(*mat, bs)); 3570 PetscCall(MatSetUp(*mat)); 3571 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3572 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3573 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3574 PetscFunctionReturn(PETSC_SUCCESS); 3575 } 3576 3577 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3578 { 3579 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3580 PetscInt *indx; 3581 PetscScalar *values; 3582 3583 PetscFunctionBegin; 3584 PetscCall(MatGetSize(inmat, &m, &N)); 3585 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3586 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3587 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3588 PetscInt *bindx, rmax = a->rmax, j; 3589 PetscMPIInt rank, size; 3590 3591 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3592 mbs = m / bs; 3593 Nbs = N / cbs; 3594 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3595 nbs = n / cbs; 3596 3597 PetscCall(PetscMalloc1(rmax, &bindx)); 3598 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3599 3600 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3601 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3602 if (rank == size - 1) { 3603 /* Check sum(nbs) = Nbs */ 3604 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3605 } 3606 3607 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3608 for (i = 0; i < mbs; i++) { 3609 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3610 nnz = nnz / bs; 3611 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3612 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3613 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3614 } 3615 PetscCall(PetscFree(bindx)); 3616 3617 PetscCall(MatCreate(comm, outmat)); 3618 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3619 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3620 PetscCall(MatSetType(*outmat, MATBAIJ)); 3621 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3622 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3623 MatPreallocateEnd(dnz, onz); 3624 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3625 } 3626 3627 /* numeric phase */ 3628 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3629 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3630 3631 for (i = 0; i < m; i++) { 3632 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3633 Ii = i + rstart; 3634 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3635 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3636 } 3637 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3638 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3639 PetscFunctionReturn(PETSC_SUCCESS); 3640 } 3641