1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp, *tmp2; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 PetscMPIInt iN; 721 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 749 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 750 *nrm = 0.0; 751 for (j = 0; j < mat->cmap->N; j++) { 752 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 753 } 754 PetscCall(PetscFree(tmp)); 755 PetscCall(PetscFree(tmp2)); 756 } else if (type == NORM_INFINITY) { /* max row sum */ 757 PetscReal *sums; 758 PetscCall(PetscMalloc1(bs, &sums)); 759 sum = 0.0; 760 for (j = 0; j < amat->mbs; j++) { 761 for (row = 0; row < bs; row++) sums[row] = 0.0; 762 v = amat->a + bs2 * amat->i[j]; 763 nz = amat->i[j + 1] - amat->i[j]; 764 for (i = 0; i < nz; i++) { 765 for (col = 0; col < bs; col++) { 766 for (row = 0; row < bs; row++) { 767 sums[row] += PetscAbsScalar(*v); 768 v++; 769 } 770 } 771 } 772 v = bmat->a + bs2 * bmat->i[j]; 773 nz = bmat->i[j + 1] - bmat->i[j]; 774 for (i = 0; i < nz; i++) { 775 for (col = 0; col < bs; col++) { 776 for (row = 0; row < bs; row++) { 777 sums[row] += PetscAbsScalar(*v); 778 v++; 779 } 780 } 781 } 782 for (row = 0; row < bs; row++) { 783 if (sums[row] > sum) sum = sums[row]; 784 } 785 } 786 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 787 PetscCall(PetscFree(sums)); 788 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 789 } 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 793 /* 794 Creates the hash table, and sets the table 795 This table is created only once. 796 If new entried need to be added to the matrix 797 then the hash table has to be destroyed and 798 recreated. 799 */ 800 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 803 Mat A = baij->A, B = baij->B; 804 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 805 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 806 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 807 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 808 PetscInt *HT, key; 809 MatScalar **HD; 810 PetscReal tmp; 811 #if defined(PETSC_USE_INFO) 812 PetscInt ct = 0, max = 0; 813 #endif 814 815 PetscFunctionBegin; 816 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 817 818 baij->ht_size = (PetscInt)(factor * nz); 819 ht_size = baij->ht_size; 820 821 /* Allocate Memory for Hash Table */ 822 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 823 HD = baij->hd; 824 HT = baij->ht; 825 826 /* Loop Over A */ 827 for (i = 0; i < a->mbs; i++) { 828 for (j = ai[i]; j < ai[i + 1]; j++) { 829 row = i + rstart; 830 col = aj[j] + cstart; 831 832 key = row * Nbs + col + 1; 833 h1 = HASH(ht_size, key, tmp); 834 for (k = 0; k < ht_size; k++) { 835 if (!HT[(h1 + k) % ht_size]) { 836 HT[(h1 + k) % ht_size] = key; 837 HD[(h1 + k) % ht_size] = a->a + j * bs2; 838 break; 839 #if defined(PETSC_USE_INFO) 840 } else { 841 ct++; 842 #endif 843 } 844 } 845 #if defined(PETSC_USE_INFO) 846 if (k > max) max = k; 847 #endif 848 } 849 } 850 /* Loop Over B */ 851 for (i = 0; i < b->mbs; i++) { 852 for (j = bi[i]; j < bi[i + 1]; j++) { 853 row = i + rstart; 854 col = garray[bj[j]]; 855 key = row * Nbs + col + 1; 856 h1 = HASH(ht_size, key, tmp); 857 for (k = 0; k < ht_size; k++) { 858 if (!HT[(h1 + k) % ht_size]) { 859 HT[(h1 + k) % ht_size] = key; 860 HD[(h1 + k) % ht_size] = b->a + j * bs2; 861 break; 862 #if defined(PETSC_USE_INFO) 863 } else { 864 ct++; 865 #endif 866 } 867 } 868 #if defined(PETSC_USE_INFO) 869 if (k > max) max = k; 870 #endif 871 } 872 } 873 874 /* Print Summary */ 875 #if defined(PETSC_USE_INFO) 876 for (i = 0, j = 0; i < ht_size; i++) { 877 if (HT[i]) j++; 878 } 879 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 880 #endif 881 PetscFunctionReturn(PETSC_SUCCESS); 882 } 883 884 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 885 { 886 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 887 PetscInt nstash, reallocs; 888 889 PetscFunctionBegin; 890 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 891 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 893 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 894 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 895 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 896 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 897 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 901 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 902 { 903 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 905 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 906 PetscInt *row, *col; 907 PetscBool r1, r2, r3, other_disassembled; 908 MatScalar *val; 909 PetscMPIInt n; 910 911 PetscFunctionBegin; 912 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 913 if (!baij->donotstash && !mat->nooffprocentries) { 914 while (1) { 915 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 916 if (!flg) break; 917 918 for (i = 0; i < n;) { 919 /* Now identify the consecutive vals belonging to the same row */ 920 for (j = i, rstart = row[j]; j < n; j++) { 921 if (row[j] != rstart) break; 922 } 923 if (j < n) ncols = j - i; 924 else ncols = n - i; 925 /* Now assemble all these values with a single function call */ 926 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 927 i = j; 928 } 929 } 930 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 931 /* Now process the block-stash. Since the values are stashed column-oriented, 932 set the row-oriented flag to column-oriented, and after MatSetValues() 933 restore the original flags */ 934 r1 = baij->roworiented; 935 r2 = a->roworiented; 936 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 937 938 baij->roworiented = PETSC_FALSE; 939 a->roworiented = PETSC_FALSE; 940 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 941 while (1) { 942 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 943 if (!flg) break; 944 945 for (i = 0; i < n;) { 946 /* Now identify the consecutive vals belonging to the same row */ 947 for (j = i, rstart = row[j]; j < n; j++) { 948 if (row[j] != rstart) break; 949 } 950 if (j < n) ncols = j - i; 951 else ncols = n - i; 952 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 953 i = j; 954 } 955 } 956 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 957 958 baij->roworiented = r1; 959 a->roworiented = r2; 960 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 961 } 962 963 PetscCall(MatAssemblyBegin(baij->A, mode)); 964 PetscCall(MatAssemblyEnd(baij->A, mode)); 965 966 /* determine if any processor has disassembled, if so we must 967 also disassemble ourselves, in order that we may reassemble. */ 968 /* 969 if nonzero structure of submatrix B cannot change then we know that 970 no processor disassembled thus we can skip this stuff 971 */ 972 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 973 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 974 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 975 } 976 977 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 978 PetscCall(MatAssemblyBegin(baij->B, mode)); 979 PetscCall(MatAssemblyEnd(baij->B, mode)); 980 981 #if defined(PETSC_USE_INFO) 982 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 983 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 984 985 baij->ht_total_ct = 0; 986 baij->ht_insert_ct = 0; 987 } 988 #endif 989 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 990 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 991 992 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 993 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 994 } 995 996 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 997 998 baij->rowvalues = NULL; 999 1000 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1001 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 1002 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1003 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1004 } 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1009 #include <petscdraw.h> 1010 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1011 { 1012 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1013 PetscMPIInt rank = baij->rank; 1014 PetscInt bs = mat->rmap->bs; 1015 PetscBool iascii, isdraw; 1016 PetscViewer sviewer; 1017 PetscViewerFormat format; 1018 1019 PetscFunctionBegin; 1020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1022 if (iascii) { 1023 PetscCall(PetscViewerGetFormat(viewer, &format)); 1024 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1025 MatInfo info; 1026 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1027 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1028 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1029 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1030 mat->rmap->bs, info.memory)); 1031 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1033 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1034 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1035 PetscCall(PetscViewerFlush(viewer)); 1036 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1037 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1038 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1041 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1044 PetscFunctionReturn(PETSC_SUCCESS); 1045 } 1046 } 1047 1048 if (isdraw) { 1049 PetscDraw draw; 1050 PetscBool isnull; 1051 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1052 PetscCall(PetscDrawIsNull(draw, &isnull)); 1053 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 { 1057 /* assemble the entire matrix onto first processor. */ 1058 Mat A; 1059 Mat_SeqBAIJ *Aloc; 1060 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1061 MatScalar *a; 1062 const char *matname; 1063 1064 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1065 /* Perhaps this should be the type of mat? */ 1066 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1067 if (rank == 0) { 1068 PetscCall(MatSetSizes(A, M, N, M, N)); 1069 } else { 1070 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1071 } 1072 PetscCall(MatSetType(A, MATMPIBAIJ)); 1073 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1074 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1075 1076 /* copy over the A part */ 1077 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1078 ai = Aloc->i; 1079 aj = Aloc->j; 1080 a = Aloc->a; 1081 PetscCall(PetscMalloc1(bs, &rvals)); 1082 1083 for (i = 0; i < mbs; i++) { 1084 rvals[0] = bs * (baij->rstartbs + i); 1085 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1086 for (j = ai[i]; j < ai[i + 1]; j++) { 1087 col = (baij->cstartbs + aj[j]) * bs; 1088 for (k = 0; k < bs; k++) { 1089 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1090 col++; 1091 a += bs; 1092 } 1093 } 1094 } 1095 /* copy over the B part */ 1096 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1097 ai = Aloc->i; 1098 aj = Aloc->j; 1099 a = Aloc->a; 1100 for (i = 0; i < mbs; i++) { 1101 rvals[0] = bs * (baij->rstartbs + i); 1102 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1103 for (j = ai[i]; j < ai[i + 1]; j++) { 1104 col = baij->garray[aj[j]] * bs; 1105 for (k = 0; k < bs; k++) { 1106 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1107 col++; 1108 a += bs; 1109 } 1110 } 1111 } 1112 PetscCall(PetscFree(rvals)); 1113 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1114 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1115 /* 1116 Everyone has to call to draw the matrix since the graphics waits are 1117 synchronized across all processors that share the PetscDraw object 1118 */ 1119 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1120 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1121 if (rank == 0) { 1122 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1123 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1124 } 1125 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1126 PetscCall(MatDestroy(&A)); 1127 } 1128 PetscFunctionReturn(PETSC_SUCCESS); 1129 } 1130 1131 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1132 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1133 { 1134 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1135 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1136 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1137 const PetscInt *garray = aij->garray; 1138 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1139 PetscCount nz, hnz; 1140 PetscInt *rowlens, *colidxs; 1141 PetscScalar *matvals; 1142 PetscMPIInt rank; 1143 1144 PetscFunctionBegin; 1145 PetscCall(PetscViewerSetUp(viewer)); 1146 1147 M = mat->rmap->N; 1148 N = mat->cmap->N; 1149 m = mat->rmap->n; 1150 rs = mat->rmap->rstart; 1151 cs = mat->cmap->rstart; 1152 bs = mat->rmap->bs; 1153 nz = bs * bs * (A->nz + B->nz); 1154 1155 /* write matrix header */ 1156 header[0] = MAT_FILE_CLASSID; 1157 header[1] = M; 1158 header[2] = N; 1159 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1160 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1161 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1162 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1163 1164 /* fill in and store row lengths */ 1165 PetscCall(PetscMalloc1(m, &rowlens)); 1166 for (cnt = 0, i = 0; i < A->mbs; i++) 1167 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1168 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1169 PetscCall(PetscFree(rowlens)); 1170 1171 /* fill in and store column indices */ 1172 PetscCall(PetscMalloc1(nz, &colidxs)); 1173 for (cnt = 0, i = 0; i < A->mbs; i++) { 1174 for (k = 0; k < bs; k++) { 1175 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1176 if (garray[B->j[jb]] > cs / bs) break; 1177 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1178 } 1179 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1180 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1181 for (; jb < B->i[i + 1]; jb++) 1182 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1183 } 1184 } 1185 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1186 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1187 PetscCall(PetscFree(colidxs)); 1188 1189 /* fill in and store nonzero values */ 1190 PetscCall(PetscMalloc1(nz, &matvals)); 1191 for (cnt = 0, i = 0; i < A->mbs; i++) { 1192 for (k = 0; k < bs; k++) { 1193 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1194 if (garray[B->j[jb]] > cs / bs) break; 1195 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1196 } 1197 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1198 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1199 for (; jb < B->i[i + 1]; jb++) 1200 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1201 } 1202 } 1203 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1204 PetscCall(PetscFree(matvals)); 1205 1206 /* write block size option to the viewer's .info file */ 1207 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1208 PetscFunctionReturn(PETSC_SUCCESS); 1209 } 1210 1211 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1212 { 1213 PetscBool iascii, isdraw, issocket, isbinary; 1214 1215 PetscFunctionBegin; 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1219 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1220 if (iascii || isdraw || issocket) { 1221 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1222 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1223 PetscFunctionReturn(PETSC_SUCCESS); 1224 } 1225 1226 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1227 { 1228 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1229 PetscInt nt; 1230 1231 PetscFunctionBegin; 1232 PetscCall(VecGetLocalSize(xx, &nt)); 1233 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1234 PetscCall(VecGetLocalSize(yy, &nt)); 1235 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1236 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1237 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1238 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1239 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1240 PetscFunctionReturn(PETSC_SUCCESS); 1241 } 1242 1243 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1244 { 1245 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1246 1247 PetscFunctionBegin; 1248 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1249 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1250 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1251 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1256 { 1257 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1258 1259 PetscFunctionBegin; 1260 /* do nondiagonal part */ 1261 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1262 /* do local part */ 1263 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1264 /* add partial results together */ 1265 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1266 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1267 PetscFunctionReturn(PETSC_SUCCESS); 1268 } 1269 1270 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1271 { 1272 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1273 1274 PetscFunctionBegin; 1275 /* do nondiagonal part */ 1276 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1277 /* do local part */ 1278 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1279 /* add partial results together */ 1280 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1281 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 1285 /* 1286 This only works correctly for square matrices where the subblock A->A is the 1287 diagonal block 1288 */ 1289 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1290 { 1291 PetscFunctionBegin; 1292 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1293 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } 1296 1297 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1298 { 1299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1300 1301 PetscFunctionBegin; 1302 PetscCall(MatScale(a->A, aa)); 1303 PetscCall(MatScale(a->B, aa)); 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306 1307 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1308 { 1309 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1310 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1311 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1312 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1313 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1314 1315 PetscFunctionBegin; 1316 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1317 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1318 mat->getrowactive = PETSC_TRUE; 1319 1320 if (!mat->rowvalues && (idx || v)) { 1321 /* 1322 allocate enough space to hold information from the longest row. 1323 */ 1324 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1325 PetscInt max = 1, mbs = mat->mbs, tmp; 1326 for (i = 0; i < mbs; i++) { 1327 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1328 if (max < tmp) max = tmp; 1329 } 1330 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1331 } 1332 lrow = row - brstart; 1333 1334 pvA = &vworkA; 1335 pcA = &cworkA; 1336 pvB = &vworkB; 1337 pcB = &cworkB; 1338 if (!v) { 1339 pvA = NULL; 1340 pvB = NULL; 1341 } 1342 if (!idx) { 1343 pcA = NULL; 1344 if (!v) pcB = NULL; 1345 } 1346 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1347 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1348 nztot = nzA + nzB; 1349 1350 cmap = mat->garray; 1351 if (v || idx) { 1352 if (nztot) { 1353 /* Sort by increasing column numbers, assuming A and B already sorted */ 1354 PetscInt imark = -1; 1355 if (v) { 1356 *v = v_p = mat->rowvalues; 1357 for (i = 0; i < nzB; i++) { 1358 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1359 else break; 1360 } 1361 imark = i; 1362 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1363 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1364 } 1365 if (idx) { 1366 *idx = idx_p = mat->rowindices; 1367 if (imark > -1) { 1368 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1369 } else { 1370 for (i = 0; i < nzB; i++) { 1371 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1372 else break; 1373 } 1374 imark = i; 1375 } 1376 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1377 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1378 } 1379 } else { 1380 if (idx) *idx = NULL; 1381 if (v) *v = NULL; 1382 } 1383 } 1384 *nz = nztot; 1385 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1386 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1387 PetscFunctionReturn(PETSC_SUCCESS); 1388 } 1389 1390 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1391 { 1392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1393 1394 PetscFunctionBegin; 1395 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1396 baij->getrowactive = PETSC_FALSE; 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1401 { 1402 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1403 1404 PetscFunctionBegin; 1405 PetscCall(MatZeroEntries(l->A)); 1406 PetscCall(MatZeroEntries(l->B)); 1407 PetscFunctionReturn(PETSC_SUCCESS); 1408 } 1409 1410 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1413 Mat A = a->A, B = a->B; 1414 PetscLogDouble isend[5], irecv[5]; 1415 1416 PetscFunctionBegin; 1417 info->block_size = (PetscReal)matin->rmap->bs; 1418 1419 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1420 1421 isend[0] = info->nz_used; 1422 isend[1] = info->nz_allocated; 1423 isend[2] = info->nz_unneeded; 1424 isend[3] = info->memory; 1425 isend[4] = info->mallocs; 1426 1427 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1428 1429 isend[0] += info->nz_used; 1430 isend[1] += info->nz_allocated; 1431 isend[2] += info->nz_unneeded; 1432 isend[3] += info->memory; 1433 isend[4] += info->mallocs; 1434 1435 if (flag == MAT_LOCAL) { 1436 info->nz_used = isend[0]; 1437 info->nz_allocated = isend[1]; 1438 info->nz_unneeded = isend[2]; 1439 info->memory = isend[3]; 1440 info->mallocs = isend[4]; 1441 } else if (flag == MAT_GLOBAL_MAX) { 1442 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1443 1444 info->nz_used = irecv[0]; 1445 info->nz_allocated = irecv[1]; 1446 info->nz_unneeded = irecv[2]; 1447 info->memory = irecv[3]; 1448 info->mallocs = irecv[4]; 1449 } else if (flag == MAT_GLOBAL_SUM) { 1450 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1451 1452 info->nz_used = irecv[0]; 1453 info->nz_allocated = irecv[1]; 1454 info->nz_unneeded = irecv[2]; 1455 info->memory = irecv[3]; 1456 info->mallocs = irecv[4]; 1457 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1458 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1459 info->fill_ratio_needed = 0; 1460 info->factor_mallocs = 0; 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1465 { 1466 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1467 1468 PetscFunctionBegin; 1469 switch (op) { 1470 case MAT_NEW_NONZERO_LOCATIONS: 1471 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1472 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1473 case MAT_KEEP_NONZERO_PATTERN: 1474 case MAT_NEW_NONZERO_LOCATION_ERR: 1475 MatCheckPreallocated(A, 1); 1476 PetscCall(MatSetOption(a->A, op, flg)); 1477 PetscCall(MatSetOption(a->B, op, flg)); 1478 break; 1479 case MAT_ROW_ORIENTED: 1480 MatCheckPreallocated(A, 1); 1481 a->roworiented = flg; 1482 1483 PetscCall(MatSetOption(a->A, op, flg)); 1484 PetscCall(MatSetOption(a->B, op, flg)); 1485 break; 1486 case MAT_FORCE_DIAGONAL_ENTRIES: 1487 case MAT_SORTED_FULL: 1488 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1489 break; 1490 case MAT_IGNORE_OFF_PROC_ENTRIES: 1491 a->donotstash = flg; 1492 break; 1493 case MAT_USE_HASH_TABLE: 1494 a->ht_flag = flg; 1495 a->ht_fact = 1.39; 1496 break; 1497 case MAT_SYMMETRIC: 1498 case MAT_STRUCTURALLY_SYMMETRIC: 1499 case MAT_HERMITIAN: 1500 case MAT_SUBMAT_SINGLEIS: 1501 case MAT_SYMMETRY_ETERNAL: 1502 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1503 case MAT_SPD_ETERNAL: 1504 /* if the diagonal matrix is square it inherits some of the properties above */ 1505 break; 1506 default: 1507 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1508 } 1509 PetscFunctionReturn(PETSC_SUCCESS); 1510 } 1511 1512 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1513 { 1514 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1515 Mat_SeqBAIJ *Aloc; 1516 Mat B; 1517 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1518 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1519 MatScalar *a; 1520 1521 PetscFunctionBegin; 1522 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1523 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1524 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1525 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1526 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1527 /* Do not know preallocation information, but must set block size */ 1528 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1529 } else { 1530 B = *matout; 1531 } 1532 1533 /* copy over the A part */ 1534 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1535 ai = Aloc->i; 1536 aj = Aloc->j; 1537 a = Aloc->a; 1538 PetscCall(PetscMalloc1(bs, &rvals)); 1539 1540 for (i = 0; i < mbs; i++) { 1541 rvals[0] = bs * (baij->rstartbs + i); 1542 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1543 for (j = ai[i]; j < ai[i + 1]; j++) { 1544 col = (baij->cstartbs + aj[j]) * bs; 1545 for (k = 0; k < bs; k++) { 1546 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1547 1548 col++; 1549 a += bs; 1550 } 1551 } 1552 } 1553 /* copy over the B part */ 1554 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1555 ai = Aloc->i; 1556 aj = Aloc->j; 1557 a = Aloc->a; 1558 for (i = 0; i < mbs; i++) { 1559 rvals[0] = bs * (baij->rstartbs + i); 1560 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1561 for (j = ai[i]; j < ai[i + 1]; j++) { 1562 col = baij->garray[aj[j]] * bs; 1563 for (k = 0; k < bs; k++) { 1564 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1565 col++; 1566 a += bs; 1567 } 1568 } 1569 } 1570 PetscCall(PetscFree(rvals)); 1571 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1572 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1573 1574 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1575 else PetscCall(MatHeaderMerge(A, &B)); 1576 PetscFunctionReturn(PETSC_SUCCESS); 1577 } 1578 1579 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1580 { 1581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1582 Mat a = baij->A, b = baij->B; 1583 PetscInt s1, s2, s3; 1584 1585 PetscFunctionBegin; 1586 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1587 if (rr) { 1588 PetscCall(VecGetLocalSize(rr, &s1)); 1589 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1590 /* Overlap communication with computation. */ 1591 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1592 } 1593 if (ll) { 1594 PetscCall(VecGetLocalSize(ll, &s1)); 1595 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1596 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1597 } 1598 /* scale the diagonal block */ 1599 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1600 1601 if (rr) { 1602 /* Do a scatter end and then right scale the off-diagonal block */ 1603 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1604 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1605 } 1606 PetscFunctionReturn(PETSC_SUCCESS); 1607 } 1608 1609 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1610 { 1611 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1612 PetscInt *lrows; 1613 PetscInt r, len; 1614 PetscBool cong; 1615 1616 PetscFunctionBegin; 1617 /* get locally owned rows */ 1618 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1619 /* fix right-hand side if needed */ 1620 if (x && b) { 1621 const PetscScalar *xx; 1622 PetscScalar *bb; 1623 1624 PetscCall(VecGetArrayRead(x, &xx)); 1625 PetscCall(VecGetArray(b, &bb)); 1626 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1627 PetscCall(VecRestoreArrayRead(x, &xx)); 1628 PetscCall(VecRestoreArray(b, &bb)); 1629 } 1630 1631 /* actually zap the local rows */ 1632 /* 1633 Zero the required rows. If the "diagonal block" of the matrix 1634 is square and the user wishes to set the diagonal we use separate 1635 code so that MatSetValues() is not called for each diagonal allocating 1636 new memory, thus calling lots of mallocs and slowing things down. 1637 1638 */ 1639 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1640 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1641 PetscCall(MatHasCongruentLayouts(A, &cong)); 1642 if ((diag != 0.0) && cong) { 1643 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1644 } else if (diag != 0.0) { 1645 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1646 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1647 for (r = 0; r < len; ++r) { 1648 const PetscInt row = lrows[r] + A->rmap->rstart; 1649 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1650 } 1651 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1652 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1653 } else { 1654 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1655 } 1656 PetscCall(PetscFree(lrows)); 1657 1658 /* only change matrix nonzero state if pattern was allowed to be changed */ 1659 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1660 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1661 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1662 } 1663 PetscFunctionReturn(PETSC_SUCCESS); 1664 } 1665 1666 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1667 { 1668 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1669 PetscMPIInt n, p = 0; 1670 PetscInt i, j, k, r, len = 0, row, col, count; 1671 PetscInt *lrows, *owners = A->rmap->range; 1672 PetscSFNode *rrows; 1673 PetscSF sf; 1674 const PetscScalar *xx; 1675 PetscScalar *bb, *mask; 1676 Vec xmask, lmask; 1677 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1678 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1679 PetscScalar *aa; 1680 1681 PetscFunctionBegin; 1682 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1683 /* Create SF where leaves are input rows and roots are owned rows */ 1684 PetscCall(PetscMalloc1(n, &lrows)); 1685 for (r = 0; r < n; ++r) lrows[r] = -1; 1686 PetscCall(PetscMalloc1(N, &rrows)); 1687 for (r = 0; r < N; ++r) { 1688 const PetscInt idx = rows[r]; 1689 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1690 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1691 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1692 } 1693 rrows[r].rank = p; 1694 rrows[r].index = rows[r] - owners[p]; 1695 } 1696 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1697 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1698 /* Collect flags for rows to be zeroed */ 1699 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1700 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1701 PetscCall(PetscSFDestroy(&sf)); 1702 /* Compress and put in row numbers */ 1703 for (r = 0; r < n; ++r) 1704 if (lrows[r] >= 0) lrows[len++] = r; 1705 /* zero diagonal part of matrix */ 1706 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1707 /* handle off-diagonal part of matrix */ 1708 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1709 PetscCall(VecDuplicate(l->lvec, &lmask)); 1710 PetscCall(VecGetArray(xmask, &bb)); 1711 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1712 PetscCall(VecRestoreArray(xmask, &bb)); 1713 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecDestroy(&xmask)); 1716 if (x) { 1717 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1718 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1719 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1720 PetscCall(VecGetArray(b, &bb)); 1721 } 1722 PetscCall(VecGetArray(lmask, &mask)); 1723 /* remove zeroed rows of off-diagonal matrix */ 1724 for (i = 0; i < len; ++i) { 1725 row = lrows[i]; 1726 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1727 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1728 for (k = 0; k < count; ++k) { 1729 aa[0] = 0.0; 1730 aa += bs; 1731 } 1732 } 1733 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1734 for (i = 0; i < l->B->rmap->N; ++i) { 1735 row = i / bs; 1736 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1737 for (k = 0; k < bs; ++k) { 1738 col = bs * baij->j[j] + k; 1739 if (PetscAbsScalar(mask[col])) { 1740 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1741 if (x) bb[i] -= aa[0] * xx[col]; 1742 aa[0] = 0.0; 1743 } 1744 } 1745 } 1746 } 1747 if (x) { 1748 PetscCall(VecRestoreArray(b, &bb)); 1749 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1750 } 1751 PetscCall(VecRestoreArray(lmask, &mask)); 1752 PetscCall(VecDestroy(&lmask)); 1753 PetscCall(PetscFree(lrows)); 1754 1755 /* only change matrix nonzero state if pattern was allowed to be changed */ 1756 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1757 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1758 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1759 } 1760 PetscFunctionReturn(PETSC_SUCCESS); 1761 } 1762 1763 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1764 { 1765 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1766 1767 PetscFunctionBegin; 1768 PetscCall(MatSetUnfactored(a->A)); 1769 PetscFunctionReturn(PETSC_SUCCESS); 1770 } 1771 1772 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1773 1774 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1775 { 1776 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1777 Mat a, b, c, d; 1778 PetscBool flg; 1779 1780 PetscFunctionBegin; 1781 a = matA->A; 1782 b = matA->B; 1783 c = matB->A; 1784 d = matB->B; 1785 1786 PetscCall(MatEqual(a, c, &flg)); 1787 if (flg) PetscCall(MatEqual(b, d, &flg)); 1788 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1789 PetscFunctionReturn(PETSC_SUCCESS); 1790 } 1791 1792 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1793 { 1794 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1795 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1796 1797 PetscFunctionBegin; 1798 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1799 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1800 PetscCall(MatCopy_Basic(A, B, str)); 1801 } else { 1802 PetscCall(MatCopy(a->A, b->A, str)); 1803 PetscCall(MatCopy(a->B, b->B, str)); 1804 } 1805 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1810 { 1811 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1812 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1813 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1814 1815 PetscFunctionBegin; 1816 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1817 PetscFunctionReturn(PETSC_SUCCESS); 1818 } 1819 1820 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1821 { 1822 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1823 PetscBLASInt bnz, one = 1; 1824 Mat_SeqBAIJ *x, *y; 1825 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1826 1827 PetscFunctionBegin; 1828 if (str == SAME_NONZERO_PATTERN) { 1829 PetscScalar alpha = a; 1830 x = (Mat_SeqBAIJ *)xx->A->data; 1831 y = (Mat_SeqBAIJ *)yy->A->data; 1832 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1833 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1834 x = (Mat_SeqBAIJ *)xx->B->data; 1835 y = (Mat_SeqBAIJ *)yy->B->data; 1836 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1837 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1838 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1839 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1840 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1841 } else { 1842 Mat B; 1843 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1844 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1845 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1846 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1847 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1848 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1849 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1850 PetscCall(MatSetType(B, MATMPIBAIJ)); 1851 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1852 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1853 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1854 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1855 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1856 PetscCall(MatHeaderMerge(Y, &B)); 1857 PetscCall(PetscFree(nnz_d)); 1858 PetscCall(PetscFree(nnz_o)); 1859 } 1860 PetscFunctionReturn(PETSC_SUCCESS); 1861 } 1862 1863 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1864 { 1865 PetscFunctionBegin; 1866 if (PetscDefined(USE_COMPLEX)) { 1867 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1868 1869 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1870 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1871 } 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 1875 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1876 { 1877 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1878 1879 PetscFunctionBegin; 1880 PetscCall(MatRealPart(a->A)); 1881 PetscCall(MatRealPart(a->B)); 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1886 { 1887 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1888 1889 PetscFunctionBegin; 1890 PetscCall(MatImaginaryPart(a->A)); 1891 PetscCall(MatImaginaryPart(a->B)); 1892 PetscFunctionReturn(PETSC_SUCCESS); 1893 } 1894 1895 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1896 { 1897 IS iscol_local; 1898 PetscInt csize; 1899 1900 PetscFunctionBegin; 1901 PetscCall(ISGetLocalSize(iscol, &csize)); 1902 if (call == MAT_REUSE_MATRIX) { 1903 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1904 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1905 } else { 1906 PetscCall(ISAllGather(iscol, &iscol_local)); 1907 } 1908 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1909 if (call == MAT_INITIAL_MATRIX) { 1910 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1911 PetscCall(ISDestroy(&iscol_local)); 1912 } 1913 PetscFunctionReturn(PETSC_SUCCESS); 1914 } 1915 1916 /* 1917 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1918 in local and then by concatenating the local matrices the end result. 1919 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1920 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1921 */ 1922 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1923 { 1924 PetscMPIInt rank, size; 1925 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1926 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1927 Mat M, Mreuse; 1928 MatScalar *vwork, *aa; 1929 MPI_Comm comm; 1930 IS isrow_new, iscol_new; 1931 Mat_SeqBAIJ *aij; 1932 1933 PetscFunctionBegin; 1934 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1935 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1936 PetscCallMPI(MPI_Comm_size(comm, &size)); 1937 /* The compression and expansion should be avoided. Doesn't point 1938 out errors, might change the indices, hence buggey */ 1939 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1940 if (isrow == iscol) { 1941 iscol_new = isrow_new; 1942 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1943 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1944 1945 if (call == MAT_REUSE_MATRIX) { 1946 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1947 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1948 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1949 } else { 1950 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1951 } 1952 PetscCall(ISDestroy(&isrow_new)); 1953 PetscCall(ISDestroy(&iscol_new)); 1954 /* 1955 m - number of local rows 1956 n - number of columns (same on all processors) 1957 rstart - first row in new global matrix generated 1958 */ 1959 PetscCall(MatGetBlockSize(mat, &bs)); 1960 PetscCall(MatGetSize(Mreuse, &m, &n)); 1961 m = m / bs; 1962 n = n / bs; 1963 1964 if (call == MAT_INITIAL_MATRIX) { 1965 aij = (Mat_SeqBAIJ *)Mreuse->data; 1966 ii = aij->i; 1967 jj = aij->j; 1968 1969 /* 1970 Determine the number of non-zeros in the diagonal and off-diagonal 1971 portions of the matrix in order to do correct preallocation 1972 */ 1973 1974 /* first get start and end of "diagonal" columns */ 1975 if (csize == PETSC_DECIDE) { 1976 PetscCall(ISGetSize(isrow, &mglobal)); 1977 if (mglobal == n * bs) { /* square matrix */ 1978 nlocal = m; 1979 } else { 1980 nlocal = n / size + ((n % size) > rank); 1981 } 1982 } else { 1983 nlocal = csize / bs; 1984 } 1985 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1986 rstart = rend - nlocal; 1987 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1988 1989 /* next, compute all the lengths */ 1990 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1991 for (i = 0; i < m; i++) { 1992 jend = ii[i + 1] - ii[i]; 1993 olen = 0; 1994 dlen = 0; 1995 for (j = 0; j < jend; j++) { 1996 if (*jj < rstart || *jj >= rend) olen++; 1997 else dlen++; 1998 jj++; 1999 } 2000 olens[i] = olen; 2001 dlens[i] = dlen; 2002 } 2003 PetscCall(MatCreate(comm, &M)); 2004 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2005 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2006 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2007 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2008 PetscCall(PetscFree2(dlens, olens)); 2009 } else { 2010 PetscInt ml, nl; 2011 2012 M = *newmat; 2013 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2014 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2015 PetscCall(MatZeroEntries(M)); 2016 /* 2017 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2018 rather than the slower MatSetValues(). 2019 */ 2020 M->was_assembled = PETSC_TRUE; 2021 M->assembled = PETSC_FALSE; 2022 } 2023 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2024 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2025 aij = (Mat_SeqBAIJ *)Mreuse->data; 2026 ii = aij->i; 2027 jj = aij->j; 2028 aa = aij->a; 2029 for (i = 0; i < m; i++) { 2030 row = rstart / bs + i; 2031 nz = ii[i + 1] - ii[i]; 2032 cwork = jj; 2033 jj = PetscSafePointerPlusOffset(jj, nz); 2034 vwork = aa; 2035 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2036 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2037 } 2038 2039 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2040 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2041 *newmat = M; 2042 2043 /* save submatrix used in processor for next request */ 2044 if (call == MAT_INITIAL_MATRIX) { 2045 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2046 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2047 } 2048 PetscFunctionReturn(PETSC_SUCCESS); 2049 } 2050 2051 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2052 { 2053 MPI_Comm comm, pcomm; 2054 PetscInt clocal_size, nrows; 2055 const PetscInt *rows; 2056 PetscMPIInt size; 2057 IS crowp, lcolp; 2058 2059 PetscFunctionBegin; 2060 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2061 /* make a collective version of 'rowp' */ 2062 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2063 if (pcomm == comm) { 2064 crowp = rowp; 2065 } else { 2066 PetscCall(ISGetSize(rowp, &nrows)); 2067 PetscCall(ISGetIndices(rowp, &rows)); 2068 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2069 PetscCall(ISRestoreIndices(rowp, &rows)); 2070 } 2071 PetscCall(ISSetPermutation(crowp)); 2072 /* make a local version of 'colp' */ 2073 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2074 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2075 if (size == 1) { 2076 lcolp = colp; 2077 } else { 2078 PetscCall(ISAllGather(colp, &lcolp)); 2079 } 2080 PetscCall(ISSetPermutation(lcolp)); 2081 /* now we just get the submatrix */ 2082 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2083 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2084 /* clean up */ 2085 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2086 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2087 PetscFunctionReturn(PETSC_SUCCESS); 2088 } 2089 2090 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2091 { 2092 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2093 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2094 2095 PetscFunctionBegin; 2096 if (nghosts) *nghosts = B->nbs; 2097 if (ghosts) *ghosts = baij->garray; 2098 PetscFunctionReturn(PETSC_SUCCESS); 2099 } 2100 2101 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2102 { 2103 Mat B; 2104 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2105 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2106 Mat_SeqAIJ *b; 2107 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2108 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2109 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2110 2111 PetscFunctionBegin; 2112 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2113 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2114 2115 /* Tell every processor the number of nonzeros per row */ 2116 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2117 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2118 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2119 displs = recvcounts + size; 2120 for (i = 0; i < size; i++) { 2121 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2122 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2123 } 2124 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2125 /* Create the sequential matrix of the same type as the local block diagonal */ 2126 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2127 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2128 PetscCall(MatSetType(B, MATSEQAIJ)); 2129 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2130 b = (Mat_SeqAIJ *)B->data; 2131 2132 /* Copy my part of matrix column indices over */ 2133 sendcount = ad->nz + bd->nz; 2134 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2135 a_jsendbuf = ad->j; 2136 b_jsendbuf = bd->j; 2137 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2138 cnt = 0; 2139 for (i = 0; i < n; i++) { 2140 /* put in lower diagonal portion */ 2141 m = bd->i[i + 1] - bd->i[i]; 2142 while (m > 0) { 2143 /* is it above diagonal (in bd (compressed) numbering) */ 2144 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2145 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2146 m--; 2147 } 2148 2149 /* put in diagonal portion */ 2150 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2151 2152 /* put in upper diagonal portion */ 2153 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2154 } 2155 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2156 2157 /* Gather all column indices to all processors */ 2158 for (i = 0; i < size; i++) { 2159 recvcounts[i] = 0; 2160 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2161 } 2162 displs[0] = 0; 2163 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2164 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2165 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2166 /* set the b->ilen (length of each row) values */ 2167 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2168 /* set the b->i indices */ 2169 b->i[0] = 0; 2170 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2171 PetscCall(PetscFree(lens)); 2172 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2173 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2174 PetscCall(PetscFree(recvcounts)); 2175 2176 PetscCall(MatPropagateSymmetryOptions(A, B)); 2177 *newmat = B; 2178 PetscFunctionReturn(PETSC_SUCCESS); 2179 } 2180 2181 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2182 { 2183 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2184 Vec bb1 = NULL; 2185 2186 PetscFunctionBegin; 2187 if (flag == SOR_APPLY_UPPER) { 2188 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2189 PetscFunctionReturn(PETSC_SUCCESS); 2190 } 2191 2192 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2193 2194 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2195 if (flag & SOR_ZERO_INITIAL_GUESS) { 2196 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2197 its--; 2198 } 2199 2200 while (its--) { 2201 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2202 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2203 2204 /* update rhs: bb1 = bb - B*x */ 2205 PetscCall(VecScale(mat->lvec, -1.0)); 2206 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2207 2208 /* local sweep */ 2209 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2210 } 2211 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2212 if (flag & SOR_ZERO_INITIAL_GUESS) { 2213 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2214 its--; 2215 } 2216 while (its--) { 2217 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2218 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2219 2220 /* update rhs: bb1 = bb - B*x */ 2221 PetscCall(VecScale(mat->lvec, -1.0)); 2222 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2223 2224 /* local sweep */ 2225 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2226 } 2227 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2228 if (flag & SOR_ZERO_INITIAL_GUESS) { 2229 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2230 its--; 2231 } 2232 while (its--) { 2233 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2234 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2235 2236 /* update rhs: bb1 = bb - B*x */ 2237 PetscCall(VecScale(mat->lvec, -1.0)); 2238 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2239 2240 /* local sweep */ 2241 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2242 } 2243 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2244 2245 PetscCall(VecDestroy(&bb1)); 2246 PetscFunctionReturn(PETSC_SUCCESS); 2247 } 2248 2249 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2250 { 2251 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2252 PetscInt m, N, i, *garray = aij->garray; 2253 PetscInt ib, jb, bs = A->rmap->bs; 2254 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2255 MatScalar *a_val = a_aij->a; 2256 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2257 MatScalar *b_val = b_aij->a; 2258 PetscReal *work; 2259 PetscMPIInt iN; 2260 2261 PetscFunctionBegin; 2262 PetscCall(MatGetSize(A, &m, &N)); 2263 PetscCall(PetscCalloc1(N, &work)); 2264 if (type == NORM_2) { 2265 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2266 for (jb = 0; jb < bs; jb++) { 2267 for (ib = 0; ib < bs; ib++) { 2268 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2269 a_val++; 2270 } 2271 } 2272 } 2273 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2274 for (jb = 0; jb < bs; jb++) { 2275 for (ib = 0; ib < bs; ib++) { 2276 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2277 b_val++; 2278 } 2279 } 2280 } 2281 } else if (type == NORM_1) { 2282 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2283 for (jb = 0; jb < bs; jb++) { 2284 for (ib = 0; ib < bs; ib++) { 2285 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2286 a_val++; 2287 } 2288 } 2289 } 2290 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2291 for (jb = 0; jb < bs; jb++) { 2292 for (ib = 0; ib < bs; ib++) { 2293 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2294 b_val++; 2295 } 2296 } 2297 } 2298 } else if (type == NORM_INFINITY) { 2299 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2300 for (jb = 0; jb < bs; jb++) { 2301 for (ib = 0; ib < bs; ib++) { 2302 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2303 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2304 a_val++; 2305 } 2306 } 2307 } 2308 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2309 for (jb = 0; jb < bs; jb++) { 2310 for (ib = 0; ib < bs; ib++) { 2311 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2312 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2313 b_val++; 2314 } 2315 } 2316 } 2317 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2318 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2319 for (jb = 0; jb < bs; jb++) { 2320 for (ib = 0; ib < bs; ib++) { 2321 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2322 a_val++; 2323 } 2324 } 2325 } 2326 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2327 for (jb = 0; jb < bs; jb++) { 2328 for (ib = 0; ib < bs; ib++) { 2329 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2330 b_val++; 2331 } 2332 } 2333 } 2334 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2335 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2336 for (jb = 0; jb < bs; jb++) { 2337 for (ib = 0; ib < bs; ib++) { 2338 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2339 a_val++; 2340 } 2341 } 2342 } 2343 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2344 for (jb = 0; jb < bs; jb++) { 2345 for (ib = 0; ib < bs; ib++) { 2346 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2347 b_val++; 2348 } 2349 } 2350 } 2351 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2352 PetscCall(PetscMPIIntCast(N, &iN)); 2353 if (type == NORM_INFINITY) { 2354 PetscCallMPI(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2355 } else { 2356 PetscCallMPI(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2357 } 2358 PetscCall(PetscFree(work)); 2359 if (type == NORM_2) { 2360 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2361 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2362 for (i = 0; i < N; i++) reductions[i] /= m; 2363 } 2364 PetscFunctionReturn(PETSC_SUCCESS); 2365 } 2366 2367 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2368 { 2369 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2370 2371 PetscFunctionBegin; 2372 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2373 A->factorerrortype = a->A->factorerrortype; 2374 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2375 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378 2379 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2380 { 2381 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2382 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2383 2384 PetscFunctionBegin; 2385 if (!Y->preallocated) { 2386 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2387 } else if (!aij->nz) { 2388 PetscInt nonew = aij->nonew; 2389 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2390 aij->nonew = nonew; 2391 } 2392 PetscCall(MatShift_Basic(Y, a)); 2393 PetscFunctionReturn(PETSC_SUCCESS); 2394 } 2395 2396 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2397 { 2398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2399 2400 PetscFunctionBegin; 2401 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2402 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2403 if (d) { 2404 PetscInt rstart; 2405 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2406 *d += rstart / A->rmap->bs; 2407 } 2408 PetscFunctionReturn(PETSC_SUCCESS); 2409 } 2410 2411 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2412 { 2413 PetscFunctionBegin; 2414 *a = ((Mat_MPIBAIJ *)A->data)->A; 2415 PetscFunctionReturn(PETSC_SUCCESS); 2416 } 2417 2418 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2419 { 2420 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2421 2422 PetscFunctionBegin; 2423 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2424 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2425 PetscFunctionReturn(PETSC_SUCCESS); 2426 } 2427 2428 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2429 MatGetRow_MPIBAIJ, 2430 MatRestoreRow_MPIBAIJ, 2431 MatMult_MPIBAIJ, 2432 /* 4*/ MatMultAdd_MPIBAIJ, 2433 MatMultTranspose_MPIBAIJ, 2434 MatMultTransposeAdd_MPIBAIJ, 2435 NULL, 2436 NULL, 2437 NULL, 2438 /*10*/ NULL, 2439 NULL, 2440 NULL, 2441 MatSOR_MPIBAIJ, 2442 MatTranspose_MPIBAIJ, 2443 /*15*/ MatGetInfo_MPIBAIJ, 2444 MatEqual_MPIBAIJ, 2445 MatGetDiagonal_MPIBAIJ, 2446 MatDiagonalScale_MPIBAIJ, 2447 MatNorm_MPIBAIJ, 2448 /*20*/ MatAssemblyBegin_MPIBAIJ, 2449 MatAssemblyEnd_MPIBAIJ, 2450 MatSetOption_MPIBAIJ, 2451 MatZeroEntries_MPIBAIJ, 2452 /*24*/ MatZeroRows_MPIBAIJ, 2453 NULL, 2454 NULL, 2455 NULL, 2456 NULL, 2457 /*29*/ MatSetUp_MPI_Hash, 2458 NULL, 2459 NULL, 2460 MatGetDiagonalBlock_MPIBAIJ, 2461 NULL, 2462 /*34*/ MatDuplicate_MPIBAIJ, 2463 NULL, 2464 NULL, 2465 NULL, 2466 NULL, 2467 /*39*/ MatAXPY_MPIBAIJ, 2468 MatCreateSubMatrices_MPIBAIJ, 2469 MatIncreaseOverlap_MPIBAIJ, 2470 MatGetValues_MPIBAIJ, 2471 MatCopy_MPIBAIJ, 2472 /*44*/ NULL, 2473 MatScale_MPIBAIJ, 2474 MatShift_MPIBAIJ, 2475 NULL, 2476 MatZeroRowsColumns_MPIBAIJ, 2477 /*49*/ NULL, 2478 NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 /*54*/ MatFDColoringCreate_MPIXAIJ, 2483 NULL, 2484 MatSetUnfactored_MPIBAIJ, 2485 MatPermute_MPIBAIJ, 2486 MatSetValuesBlocked_MPIBAIJ, 2487 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2488 MatDestroy_MPIBAIJ, 2489 MatView_MPIBAIJ, 2490 NULL, 2491 NULL, 2492 /*64*/ NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 NULL, 2497 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2498 NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*74*/ NULL, 2503 MatFDColoringApply_BAIJ, 2504 NULL, 2505 NULL, 2506 NULL, 2507 /*79*/ NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 MatLoad_MPIBAIJ, 2512 /*84*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 /*89*/ NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 /*94*/ NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 /*99*/ NULL, 2528 NULL, 2529 NULL, 2530 MatConjugate_MPIBAIJ, 2531 NULL, 2532 /*104*/ NULL, 2533 MatRealPart_MPIBAIJ, 2534 MatImaginaryPart_MPIBAIJ, 2535 NULL, 2536 NULL, 2537 /*109*/ NULL, 2538 NULL, 2539 NULL, 2540 NULL, 2541 MatMissingDiagonal_MPIBAIJ, 2542 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2543 NULL, 2544 MatGetGhosts_MPIBAIJ, 2545 NULL, 2546 NULL, 2547 /*119*/ NULL, 2548 NULL, 2549 NULL, 2550 NULL, 2551 MatGetMultiProcBlock_MPIBAIJ, 2552 /*124*/ NULL, 2553 MatGetColumnReductions_MPIBAIJ, 2554 MatInvertBlockDiagonal_MPIBAIJ, 2555 NULL, 2556 NULL, 2557 /*129*/ NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 NULL, 2562 /*134*/ NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 /*139*/ MatSetBlockSizes_Default, 2568 NULL, 2569 NULL, 2570 MatFDColoringSetUp_MPIXAIJ, 2571 NULL, 2572 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2573 NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 NULL, 2578 /*150*/ NULL, 2579 MatEliminateZeros_MPIBAIJ, 2580 MatGetRowSumAbs_MPIBAIJ, 2581 NULL, 2582 NULL, 2583 /*155*/ NULL, 2584 MatCopyHashToXAIJ_MPI_Hash}; 2585 2586 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2587 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2588 2589 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2590 { 2591 PetscInt m, rstart, cstart, cend; 2592 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2593 const PetscInt *JJ = NULL; 2594 PetscScalar *values = NULL; 2595 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2596 PetscBool nooffprocentries; 2597 2598 PetscFunctionBegin; 2599 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2600 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2601 PetscCall(PetscLayoutSetUp(B->rmap)); 2602 PetscCall(PetscLayoutSetUp(B->cmap)); 2603 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2604 m = B->rmap->n / bs; 2605 rstart = B->rmap->rstart / bs; 2606 cstart = B->cmap->rstart / bs; 2607 cend = B->cmap->rend / bs; 2608 2609 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2610 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2611 for (i = 0; i < m; i++) { 2612 nz = ii[i + 1] - ii[i]; 2613 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2614 nz_max = PetscMax(nz_max, nz); 2615 dlen = 0; 2616 olen = 0; 2617 JJ = jj + ii[i]; 2618 for (j = 0; j < nz; j++) { 2619 if (*JJ < cstart || *JJ >= cend) olen++; 2620 else dlen++; 2621 JJ++; 2622 } 2623 d_nnz[i] = dlen; 2624 o_nnz[i] = olen; 2625 } 2626 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2627 PetscCall(PetscFree2(d_nnz, o_nnz)); 2628 2629 values = (PetscScalar *)V; 2630 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2631 for (i = 0; i < m; i++) { 2632 PetscInt row = i + rstart; 2633 PetscInt ncols = ii[i + 1] - ii[i]; 2634 const PetscInt *icols = jj + ii[i]; 2635 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2636 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2637 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2638 } else { /* block ordering does not match so we can only insert one block at a time. */ 2639 PetscInt j; 2640 for (j = 0; j < ncols; j++) { 2641 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2642 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2643 } 2644 } 2645 } 2646 2647 if (!V) PetscCall(PetscFree(values)); 2648 nooffprocentries = B->nooffprocentries; 2649 B->nooffprocentries = PETSC_TRUE; 2650 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2651 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2652 B->nooffprocentries = nooffprocentries; 2653 2654 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2655 PetscFunctionReturn(PETSC_SUCCESS); 2656 } 2657 2658 /*@C 2659 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2660 2661 Collective 2662 2663 Input Parameters: 2664 + B - the matrix 2665 . bs - the block size 2666 . i - the indices into `j` for the start of each local row (starts with zero) 2667 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2668 - v - optional values in the matrix, use `NULL` if not provided 2669 2670 Level: advanced 2671 2672 Notes: 2673 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2674 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2675 called this routine. 2676 2677 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2678 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2679 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2680 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2681 block column and the second index is over columns within a block. 2682 2683 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2684 2685 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2686 @*/ 2687 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2688 { 2689 PetscFunctionBegin; 2690 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2691 PetscValidType(B, 1); 2692 PetscValidLogicalCollectiveInt(B, bs, 2); 2693 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2694 PetscFunctionReturn(PETSC_SUCCESS); 2695 } 2696 2697 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2698 { 2699 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2700 PetscInt i; 2701 PetscMPIInt size; 2702 2703 PetscFunctionBegin; 2704 if (B->hash_active) { 2705 B->ops[0] = b->cops; 2706 B->hash_active = PETSC_FALSE; 2707 } 2708 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2709 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2710 PetscCall(PetscLayoutSetUp(B->rmap)); 2711 PetscCall(PetscLayoutSetUp(B->cmap)); 2712 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2713 2714 if (d_nnz) { 2715 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2716 } 2717 if (o_nnz) { 2718 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2719 } 2720 2721 b->bs2 = bs * bs; 2722 b->mbs = B->rmap->n / bs; 2723 b->nbs = B->cmap->n / bs; 2724 b->Mbs = B->rmap->N / bs; 2725 b->Nbs = B->cmap->N / bs; 2726 2727 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2728 b->rstartbs = B->rmap->rstart / bs; 2729 b->rendbs = B->rmap->rend / bs; 2730 b->cstartbs = B->cmap->rstart / bs; 2731 b->cendbs = B->cmap->rend / bs; 2732 2733 #if defined(PETSC_USE_CTABLE) 2734 PetscCall(PetscHMapIDestroy(&b->colmap)); 2735 #else 2736 PetscCall(PetscFree(b->colmap)); 2737 #endif 2738 PetscCall(PetscFree(b->garray)); 2739 PetscCall(VecDestroy(&b->lvec)); 2740 PetscCall(VecScatterDestroy(&b->Mvctx)); 2741 2742 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2743 2744 MatSeqXAIJGetOptions_Private(b->B); 2745 PetscCall(MatDestroy(&b->B)); 2746 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2747 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2748 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2749 MatSeqXAIJRestoreOptions_Private(b->B); 2750 2751 MatSeqXAIJGetOptions_Private(b->A); 2752 PetscCall(MatDestroy(&b->A)); 2753 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2754 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2755 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2756 MatSeqXAIJRestoreOptions_Private(b->A); 2757 2758 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2759 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2760 B->preallocated = PETSC_TRUE; 2761 B->was_assembled = PETSC_FALSE; 2762 B->assembled = PETSC_FALSE; 2763 PetscFunctionReturn(PETSC_SUCCESS); 2764 } 2765 2766 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2767 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2768 2769 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2770 { 2771 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2772 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2773 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2774 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2775 2776 PetscFunctionBegin; 2777 PetscCall(PetscMalloc1(M + 1, &ii)); 2778 ii[0] = 0; 2779 for (i = 0; i < M; i++) { 2780 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2781 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2782 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2783 /* remove one from count of matrix has diagonal */ 2784 for (j = id[i]; j < id[i + 1]; j++) { 2785 if (jd[j] == i) { 2786 ii[i + 1]--; 2787 break; 2788 } 2789 } 2790 } 2791 PetscCall(PetscMalloc1(ii[M], &jj)); 2792 cnt = 0; 2793 for (i = 0; i < M; i++) { 2794 for (j = io[i]; j < io[i + 1]; j++) { 2795 if (garray[jo[j]] > rstart) break; 2796 jj[cnt++] = garray[jo[j]]; 2797 } 2798 for (k = id[i]; k < id[i + 1]; k++) { 2799 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2800 } 2801 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2802 } 2803 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2804 PetscFunctionReturn(PETSC_SUCCESS); 2805 } 2806 2807 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2808 2809 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2810 2811 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2812 { 2813 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2814 Mat_MPIAIJ *b; 2815 Mat B; 2816 2817 PetscFunctionBegin; 2818 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2819 2820 if (reuse == MAT_REUSE_MATRIX) { 2821 B = *newmat; 2822 } else { 2823 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2824 PetscCall(MatSetType(B, MATMPIAIJ)); 2825 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2826 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2827 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2828 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2829 } 2830 b = (Mat_MPIAIJ *)B->data; 2831 2832 if (reuse == MAT_REUSE_MATRIX) { 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2835 } else { 2836 PetscInt *garray = a->garray; 2837 Mat_SeqAIJ *bB; 2838 PetscInt bs, nnz; 2839 PetscCall(MatDestroy(&b->A)); 2840 PetscCall(MatDestroy(&b->B)); 2841 /* just clear out the data structure */ 2842 PetscCall(MatDisAssemble_MPIAIJ(B)); 2843 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2844 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2845 2846 /* Global numbering for b->B columns */ 2847 bB = (Mat_SeqAIJ *)b->B->data; 2848 bs = A->rmap->bs; 2849 nnz = bB->i[A->rmap->n]; 2850 for (PetscInt k = 0; k < nnz; k++) { 2851 PetscInt bj = bB->j[k] / bs; 2852 PetscInt br = bB->j[k] % bs; 2853 bB->j[k] = garray[bj] * bs + br; 2854 } 2855 } 2856 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2857 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2858 2859 if (reuse == MAT_INPLACE_MATRIX) { 2860 PetscCall(MatHeaderReplace(A, &B)); 2861 } else { 2862 *newmat = B; 2863 } 2864 PetscFunctionReturn(PETSC_SUCCESS); 2865 } 2866 2867 /*MC 2868 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2869 2870 Options Database Keys: 2871 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2872 . -mat_block_size <bs> - set the blocksize used to store the matrix 2873 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2874 - -mat_use_hash_table <fact> - set hash table factor 2875 2876 Level: beginner 2877 2878 Note: 2879 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2880 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2881 2882 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2883 M*/ 2884 2885 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2886 2887 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2888 { 2889 Mat_MPIBAIJ *b; 2890 PetscBool flg = PETSC_FALSE; 2891 2892 PetscFunctionBegin; 2893 PetscCall(PetscNew(&b)); 2894 B->data = (void *)b; 2895 B->ops[0] = MatOps_Values; 2896 B->assembled = PETSC_FALSE; 2897 2898 B->insertmode = NOT_SET_VALUES; 2899 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2900 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2901 2902 /* build local table of row and column ownerships */ 2903 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2904 2905 /* build cache for off array entries formed */ 2906 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2907 2908 b->donotstash = PETSC_FALSE; 2909 b->colmap = NULL; 2910 b->garray = NULL; 2911 b->roworiented = PETSC_TRUE; 2912 2913 /* stuff used in block assembly */ 2914 b->barray = NULL; 2915 2916 /* stuff used for matrix vector multiply */ 2917 b->lvec = NULL; 2918 b->Mvctx = NULL; 2919 2920 /* stuff for MatGetRow() */ 2921 b->rowindices = NULL; 2922 b->rowvalues = NULL; 2923 b->getrowactive = PETSC_FALSE; 2924 2925 /* hash table stuff */ 2926 b->ht = NULL; 2927 b->hd = NULL; 2928 b->ht_size = 0; 2929 b->ht_flag = PETSC_FALSE; 2930 b->ht_fact = 0; 2931 b->ht_total_ct = 0; 2932 b->ht_insert_ct = 0; 2933 2934 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2935 b->ijonly = PETSC_FALSE; 2936 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2940 #if defined(PETSC_HAVE_HYPRE) 2941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2942 #endif 2943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2946 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2947 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2949 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2950 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2951 2952 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2953 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2954 if (flg) { 2955 PetscReal fact = 1.39; 2956 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2957 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2958 if (fact <= 1.0) fact = 1.39; 2959 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2960 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2961 } 2962 PetscOptionsEnd(); 2963 PetscFunctionReturn(PETSC_SUCCESS); 2964 } 2965 2966 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2967 /*MC 2968 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2969 2970 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2971 and `MATMPIBAIJ` otherwise. 2972 2973 Options Database Keys: 2974 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2975 2976 Level: beginner 2977 2978 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2979 M*/ 2980 2981 /*@ 2982 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2983 (block compressed row). 2984 2985 Collective 2986 2987 Input Parameters: 2988 + B - the matrix 2989 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2990 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2991 . d_nz - number of block nonzeros per block row in diagonal portion of local 2992 submatrix (same for all local rows) 2993 . d_nnz - array containing the number of block nonzeros in the various block rows 2994 of the in diagonal portion of the local (possibly different for each block 2995 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2996 set it even if it is zero. 2997 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2998 submatrix (same for all local rows). 2999 - o_nnz - array containing the number of nonzeros in the various block rows of the 3000 off-diagonal portion of the local submatrix (possibly different for 3001 each block row) or `NULL`. 3002 3003 If the *_nnz parameter is given then the *_nz parameter is ignored 3004 3005 Options Database Keys: 3006 + -mat_block_size - size of the blocks to use 3007 - -mat_use_hash_table <fact> - set hash table factor 3008 3009 Level: intermediate 3010 3011 Notes: 3012 For good matrix assembly performance 3013 the user should preallocate the matrix storage by setting the parameters 3014 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3015 performance can be increased by more than a factor of 50. 3016 3017 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3018 than it must be used on all processors that share the object for that argument. 3019 3020 Storage Information: 3021 For a square global matrix we define each processor's diagonal portion 3022 to be its local rows and the corresponding columns (a square submatrix); 3023 each processor's off-diagonal portion encompasses the remainder of the 3024 local matrix (a rectangular submatrix). 3025 3026 The user can specify preallocated storage for the diagonal part of 3027 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3028 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3029 memory allocation. Likewise, specify preallocated storage for the 3030 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3031 3032 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3033 the figure below we depict these three local rows and all columns (0-11). 3034 3035 .vb 3036 0 1 2 3 4 5 6 7 8 9 10 11 3037 -------------------------- 3038 row 3 |o o o d d d o o o o o o 3039 row 4 |o o o d d d o o o o o o 3040 row 5 |o o o d d d o o o o o o 3041 -------------------------- 3042 .ve 3043 3044 Thus, any entries in the d locations are stored in the d (diagonal) 3045 submatrix, and any entries in the o locations are stored in the 3046 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3047 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3048 3049 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3050 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3051 In general, for PDE problems in which most nonzeros are near the diagonal, 3052 one expects `d_nz` >> `o_nz`. 3053 3054 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3055 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3056 You can also run with the option `-info` and look for messages with the string 3057 malloc in them to see if additional memory allocation was needed. 3058 3059 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3060 @*/ 3061 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3062 { 3063 PetscFunctionBegin; 3064 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3065 PetscValidType(B, 1); 3066 PetscValidLogicalCollectiveInt(B, bs, 2); 3067 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3068 PetscFunctionReturn(PETSC_SUCCESS); 3069 } 3070 3071 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3072 /*@ 3073 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3074 (block compressed row). 3075 3076 Collective 3077 3078 Input Parameters: 3079 + comm - MPI communicator 3080 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3081 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3082 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3083 This value should be the same as the local size used in creating the 3084 y vector for the matrix-vector product y = Ax. 3085 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3086 This value should be the same as the local size used in creating the 3087 x vector for the matrix-vector product y = Ax. 3088 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3089 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3090 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3091 submatrix (same for all local rows) 3092 . d_nnz - array containing the number of nonzero blocks in the various block rows 3093 of the in diagonal portion of the local (possibly different for each block 3094 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3095 and set it even if it is zero. 3096 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3097 submatrix (same for all local rows). 3098 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3099 off-diagonal portion of the local submatrix (possibly different for 3100 each block row) or NULL. 3101 3102 Output Parameter: 3103 . A - the matrix 3104 3105 Options Database Keys: 3106 + -mat_block_size - size of the blocks to use 3107 - -mat_use_hash_table <fact> - set hash table factor 3108 3109 Level: intermediate 3110 3111 Notes: 3112 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3113 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3114 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3115 3116 For good matrix assembly performance 3117 the user should preallocate the matrix storage by setting the parameters 3118 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3119 performance can be increased by more than a factor of 50. 3120 3121 If the *_nnz parameter is given then the *_nz parameter is ignored 3122 3123 A nonzero block is any block that as 1 or more nonzeros in it 3124 3125 The user MUST specify either the local or global matrix dimensions 3126 (possibly both). 3127 3128 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3129 than it must be used on all processors that share the object for that argument. 3130 3131 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3132 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3133 3134 Storage Information: 3135 For a square global matrix we define each processor's diagonal portion 3136 to be its local rows and the corresponding columns (a square submatrix); 3137 each processor's off-diagonal portion encompasses the remainder of the 3138 local matrix (a rectangular submatrix). 3139 3140 The user can specify preallocated storage for the diagonal part of 3141 the local submatrix with either d_nz or d_nnz (not both). Set 3142 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3143 memory allocation. Likewise, specify preallocated storage for the 3144 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3145 3146 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3147 the figure below we depict these three local rows and all columns (0-11). 3148 3149 .vb 3150 0 1 2 3 4 5 6 7 8 9 10 11 3151 -------------------------- 3152 row 3 |o o o d d d o o o o o o 3153 row 4 |o o o d d d o o o o o o 3154 row 5 |o o o d d d o o o o o o 3155 -------------------------- 3156 .ve 3157 3158 Thus, any entries in the d locations are stored in the d (diagonal) 3159 submatrix, and any entries in the o locations are stored in the 3160 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3161 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3162 3163 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3164 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3165 In general, for PDE problems in which most nonzeros are near the diagonal, 3166 one expects `d_nz` >> `o_nz`. 3167 3168 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3169 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3170 @*/ 3171 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3172 { 3173 PetscMPIInt size; 3174 3175 PetscFunctionBegin; 3176 PetscCall(MatCreate(comm, A)); 3177 PetscCall(MatSetSizes(*A, m, n, M, N)); 3178 PetscCallMPI(MPI_Comm_size(comm, &size)); 3179 if (size > 1) { 3180 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3181 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3182 } else { 3183 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3184 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3185 } 3186 PetscFunctionReturn(PETSC_SUCCESS); 3187 } 3188 3189 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3190 { 3191 Mat mat; 3192 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3193 PetscInt len = 0; 3194 3195 PetscFunctionBegin; 3196 *newmat = NULL; 3197 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3198 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3199 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3200 3201 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3202 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3203 if (matin->hash_active) { 3204 PetscCall(MatSetUp(mat)); 3205 } else { 3206 mat->factortype = matin->factortype; 3207 mat->preallocated = PETSC_TRUE; 3208 mat->assembled = PETSC_TRUE; 3209 mat->insertmode = NOT_SET_VALUES; 3210 3211 a = (Mat_MPIBAIJ *)mat->data; 3212 mat->rmap->bs = matin->rmap->bs; 3213 a->bs2 = oldmat->bs2; 3214 a->mbs = oldmat->mbs; 3215 a->nbs = oldmat->nbs; 3216 a->Mbs = oldmat->Mbs; 3217 a->Nbs = oldmat->Nbs; 3218 3219 a->size = oldmat->size; 3220 a->rank = oldmat->rank; 3221 a->donotstash = oldmat->donotstash; 3222 a->roworiented = oldmat->roworiented; 3223 a->rowindices = NULL; 3224 a->rowvalues = NULL; 3225 a->getrowactive = PETSC_FALSE; 3226 a->barray = NULL; 3227 a->rstartbs = oldmat->rstartbs; 3228 a->rendbs = oldmat->rendbs; 3229 a->cstartbs = oldmat->cstartbs; 3230 a->cendbs = oldmat->cendbs; 3231 3232 /* hash table stuff */ 3233 a->ht = NULL; 3234 a->hd = NULL; 3235 a->ht_size = 0; 3236 a->ht_flag = oldmat->ht_flag; 3237 a->ht_fact = oldmat->ht_fact; 3238 a->ht_total_ct = 0; 3239 a->ht_insert_ct = 0; 3240 3241 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3242 if (oldmat->colmap) { 3243 #if defined(PETSC_USE_CTABLE) 3244 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3245 #else 3246 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3247 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3248 #endif 3249 } else a->colmap = NULL; 3250 3251 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3252 PetscCall(PetscMalloc1(len, &a->garray)); 3253 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3254 } else a->garray = NULL; 3255 3256 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3257 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3258 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3259 3260 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3261 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3262 } 3263 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3264 *newmat = mat; 3265 PetscFunctionReturn(PETSC_SUCCESS); 3266 } 3267 3268 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3269 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3270 { 3271 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3272 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3273 PetscScalar *matvals; 3274 3275 PetscFunctionBegin; 3276 PetscCall(PetscViewerSetUp(viewer)); 3277 3278 /* read in matrix header */ 3279 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3280 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3281 M = header[1]; 3282 N = header[2]; 3283 nz = header[3]; 3284 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3285 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3286 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3287 3288 /* set block sizes from the viewer's .info file */ 3289 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3290 /* set local sizes if not set already */ 3291 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3292 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3293 /* set global sizes if not set already */ 3294 if (mat->rmap->N < 0) mat->rmap->N = M; 3295 if (mat->cmap->N < 0) mat->cmap->N = N; 3296 PetscCall(PetscLayoutSetUp(mat->rmap)); 3297 PetscCall(PetscLayoutSetUp(mat->cmap)); 3298 3299 /* check if the matrix sizes are correct */ 3300 PetscCall(MatGetSize(mat, &rows, &cols)); 3301 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3302 PetscCall(MatGetBlockSize(mat, &bs)); 3303 PetscCall(MatGetLocalSize(mat, &m, &n)); 3304 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3305 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3306 mbs = m / bs; 3307 nbs = n / bs; 3308 3309 /* read in row lengths and build row indices */ 3310 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3311 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3312 rowidxs[0] = 0; 3313 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3314 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3315 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3316 3317 /* read in column indices and matrix values */ 3318 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3319 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3320 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3321 3322 { /* preallocate matrix storage */ 3323 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3324 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3325 PetscBool sbaij, done; 3326 PetscInt *d_nnz, *o_nnz; 3327 3328 PetscCall(PetscBTCreate(nbs, &bt)); 3329 PetscCall(PetscHSetICreate(&ht)); 3330 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3331 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3332 for (i = 0; i < mbs; i++) { 3333 PetscCall(PetscBTMemzero(nbs, bt)); 3334 PetscCall(PetscHSetIClear(ht)); 3335 for (k = 0; k < bs; k++) { 3336 PetscInt row = bs * i + k; 3337 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3338 PetscInt col = colidxs[j]; 3339 if (!sbaij || col >= row) { 3340 if (col >= cs && col < ce) { 3341 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3342 } else { 3343 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3344 if (done) o_nnz[i]++; 3345 } 3346 } 3347 } 3348 } 3349 } 3350 PetscCall(PetscBTDestroy(&bt)); 3351 PetscCall(PetscHSetIDestroy(&ht)); 3352 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3353 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3354 PetscCall(PetscFree2(d_nnz, o_nnz)); 3355 } 3356 3357 /* store matrix values */ 3358 for (i = 0; i < m; i++) { 3359 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3360 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3361 } 3362 3363 PetscCall(PetscFree(rowidxs)); 3364 PetscCall(PetscFree2(colidxs, matvals)); 3365 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3366 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3367 PetscFunctionReturn(PETSC_SUCCESS); 3368 } 3369 3370 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3371 { 3372 PetscBool isbinary; 3373 3374 PetscFunctionBegin; 3375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3376 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3377 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3378 PetscFunctionReturn(PETSC_SUCCESS); 3379 } 3380 3381 /*@ 3382 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3383 3384 Input Parameters: 3385 + mat - the matrix 3386 - fact - factor 3387 3388 Options Database Key: 3389 . -mat_use_hash_table <fact> - provide the factor 3390 3391 Level: advanced 3392 3393 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3394 @*/ 3395 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3396 { 3397 PetscFunctionBegin; 3398 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3399 PetscFunctionReturn(PETSC_SUCCESS); 3400 } 3401 3402 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3403 { 3404 Mat_MPIBAIJ *baij; 3405 3406 PetscFunctionBegin; 3407 baij = (Mat_MPIBAIJ *)mat->data; 3408 baij->ht_fact = fact; 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } 3411 3412 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3413 { 3414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3415 PetscBool flg; 3416 3417 PetscFunctionBegin; 3418 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3419 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3420 if (Ad) *Ad = a->A; 3421 if (Ao) *Ao = a->B; 3422 if (colmap) *colmap = a->garray; 3423 PetscFunctionReturn(PETSC_SUCCESS); 3424 } 3425 3426 /* 3427 Special version for direct calls from Fortran (to eliminate two function call overheads 3428 */ 3429 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3430 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3431 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3432 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3433 #endif 3434 3435 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3436 /*@C 3437 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3438 3439 Collective 3440 3441 Input Parameters: 3442 + matin - the matrix 3443 . min - number of input rows 3444 . im - input rows 3445 . nin - number of input columns 3446 . in - input columns 3447 . v - numerical values input 3448 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3449 3450 Level: advanced 3451 3452 Developer Notes: 3453 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3454 3455 .seealso: `Mat`, `MatSetValuesBlocked()` 3456 @*/ 3457 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3458 { 3459 /* convert input arguments to C version */ 3460 Mat mat = *matin; 3461 PetscInt m = *min, n = *nin; 3462 InsertMode addv = *addvin; 3463 3464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3465 const MatScalar *value; 3466 MatScalar *barray = baij->barray; 3467 PetscBool roworiented = baij->roworiented; 3468 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3469 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3470 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3471 3472 PetscFunctionBegin; 3473 /* tasks normally handled by MatSetValuesBlocked() */ 3474 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3475 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3476 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3477 if (mat->assembled) { 3478 mat->was_assembled = PETSC_TRUE; 3479 mat->assembled = PETSC_FALSE; 3480 } 3481 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3482 3483 if (!barray) { 3484 PetscCall(PetscMalloc1(bs2, &barray)); 3485 baij->barray = barray; 3486 } 3487 3488 if (roworiented) stepval = (n - 1) * bs; 3489 else stepval = (m - 1) * bs; 3490 3491 for (i = 0; i < m; i++) { 3492 if (im[i] < 0) continue; 3493 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3494 if (im[i] >= rstart && im[i] < rend) { 3495 row = im[i] - rstart; 3496 for (j = 0; j < n; j++) { 3497 /* If NumCol = 1 then a copy is not required */ 3498 if ((roworiented) && (n == 1)) { 3499 barray = (MatScalar *)v + i * bs2; 3500 } else if ((!roworiented) && (m == 1)) { 3501 barray = (MatScalar *)v + j * bs2; 3502 } else { /* Here a copy is required */ 3503 if (roworiented) { 3504 value = v + i * (stepval + bs) * bs + j * bs; 3505 } else { 3506 value = v + j * (stepval + bs) * bs + i * bs; 3507 } 3508 for (ii = 0; ii < bs; ii++, value += stepval) { 3509 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3510 } 3511 barray -= bs2; 3512 } 3513 3514 if (in[j] >= cstart && in[j] < cend) { 3515 col = in[j] - cstart; 3516 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3517 } else if (in[j] < 0) { 3518 continue; 3519 } else { 3520 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3521 if (mat->was_assembled) { 3522 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3523 3524 #if defined(PETSC_USE_DEBUG) 3525 #if defined(PETSC_USE_CTABLE) 3526 { 3527 PetscInt data; 3528 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3529 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3530 } 3531 #else 3532 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3533 #endif 3534 #endif 3535 #if defined(PETSC_USE_CTABLE) 3536 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3537 col = (col - 1) / bs; 3538 #else 3539 col = (baij->colmap[in[j]] - 1) / bs; 3540 #endif 3541 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3542 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3543 col = in[j]; 3544 } 3545 } else col = in[j]; 3546 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3547 } 3548 } 3549 } else { 3550 if (!baij->donotstash) { 3551 if (roworiented) { 3552 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3553 } else { 3554 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3555 } 3556 } 3557 } 3558 } 3559 3560 /* task normally handled by MatSetValuesBlocked() */ 3561 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3562 PetscFunctionReturn(PETSC_SUCCESS); 3563 } 3564 3565 /*@ 3566 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3567 3568 Collective 3569 3570 Input Parameters: 3571 + comm - MPI communicator 3572 . bs - the block size, only a block size of 1 is supported 3573 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3574 . n - This value should be the same as the local size used in creating the 3575 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3576 calculated if `N` is given) For square matrices `n` is almost always `m`. 3577 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3578 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3579 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3580 . j - column indices 3581 - a - matrix values 3582 3583 Output Parameter: 3584 . mat - the matrix 3585 3586 Level: intermediate 3587 3588 Notes: 3589 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3590 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3591 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3592 3593 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3594 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3595 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3596 with column-major ordering within blocks. 3597 3598 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3599 3600 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3601 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3602 @*/ 3603 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3604 { 3605 PetscFunctionBegin; 3606 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3607 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3608 PetscCall(MatCreate(comm, mat)); 3609 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3610 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3611 PetscCall(MatSetBlockSize(*mat, bs)); 3612 PetscCall(MatSetUp(*mat)); 3613 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3614 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3615 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3616 PetscFunctionReturn(PETSC_SUCCESS); 3617 } 3618 3619 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3620 { 3621 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3622 PetscInt *indx; 3623 PetscScalar *values; 3624 3625 PetscFunctionBegin; 3626 PetscCall(MatGetSize(inmat, &m, &N)); 3627 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3628 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3629 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3630 PetscInt *bindx, rmax = a->rmax, j; 3631 PetscMPIInt rank, size; 3632 3633 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3634 mbs = m / bs; 3635 Nbs = N / cbs; 3636 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3637 nbs = n / cbs; 3638 3639 PetscCall(PetscMalloc1(rmax, &bindx)); 3640 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3641 3642 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3643 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3644 if (rank == size - 1) { 3645 /* Check sum(nbs) = Nbs */ 3646 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3647 } 3648 3649 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3650 for (i = 0; i < mbs; i++) { 3651 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3652 nnz = nnz / bs; 3653 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3654 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3655 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3656 } 3657 PetscCall(PetscFree(bindx)); 3658 3659 PetscCall(MatCreate(comm, outmat)); 3660 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3661 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3662 PetscCall(MatSetType(*outmat, MATBAIJ)); 3663 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3664 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3665 MatPreallocateEnd(dnz, onz); 3666 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3667 } 3668 3669 /* numeric phase */ 3670 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3671 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3672 3673 for (i = 0; i < m; i++) { 3674 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3675 Ii = i + rstart; 3676 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3677 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3678 } 3679 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3680 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3681 PetscFunctionReturn(PETSC_SUCCESS); 3682 } 3683