1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *vv; 62 Vec vB, vA; 63 const PetscScalar *va, *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayRead(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(v, &vv)); 92 PetscCall(VecRestoreArrayRead(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp, *tmp2; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 PetscMPIInt iN; 721 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 749 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 750 *nrm = 0.0; 751 for (j = 0; j < mat->cmap->N; j++) { 752 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 753 } 754 PetscCall(PetscFree(tmp)); 755 PetscCall(PetscFree(tmp2)); 756 } else if (type == NORM_INFINITY) { /* max row sum */ 757 PetscReal *sums; 758 PetscCall(PetscMalloc1(bs, &sums)); 759 sum = 0.0; 760 for (j = 0; j < amat->mbs; j++) { 761 for (row = 0; row < bs; row++) sums[row] = 0.0; 762 v = amat->a + bs2 * amat->i[j]; 763 nz = amat->i[j + 1] - amat->i[j]; 764 for (i = 0; i < nz; i++) { 765 for (col = 0; col < bs; col++) { 766 for (row = 0; row < bs; row++) { 767 sums[row] += PetscAbsScalar(*v); 768 v++; 769 } 770 } 771 } 772 v = bmat->a + bs2 * bmat->i[j]; 773 nz = bmat->i[j + 1] - bmat->i[j]; 774 for (i = 0; i < nz; i++) { 775 for (col = 0; col < bs; col++) { 776 for (row = 0; row < bs; row++) { 777 sums[row] += PetscAbsScalar(*v); 778 v++; 779 } 780 } 781 } 782 for (row = 0; row < bs; row++) { 783 if (sums[row] > sum) sum = sums[row]; 784 } 785 } 786 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 787 PetscCall(PetscFree(sums)); 788 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 789 } 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 793 /* 794 Creates the hash table, and sets the table 795 This table is created only once. 796 If new entried need to be added to the matrix 797 then the hash table has to be destroyed and 798 recreated. 799 */ 800 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 803 Mat A = baij->A, B = baij->B; 804 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 805 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 806 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 807 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 808 PetscInt *HT, key; 809 MatScalar **HD; 810 PetscReal tmp; 811 #if defined(PETSC_USE_INFO) 812 PetscInt ct = 0, max = 0; 813 #endif 814 815 PetscFunctionBegin; 816 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 817 818 baij->ht_size = (PetscInt)(factor * nz); 819 ht_size = baij->ht_size; 820 821 /* Allocate Memory for Hash Table */ 822 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 823 HD = baij->hd; 824 HT = baij->ht; 825 826 /* Loop Over A */ 827 for (i = 0; i < a->mbs; i++) { 828 for (j = ai[i]; j < ai[i + 1]; j++) { 829 row = i + rstart; 830 col = aj[j] + cstart; 831 832 key = row * Nbs + col + 1; 833 h1 = HASH(ht_size, key, tmp); 834 for (k = 0; k < ht_size; k++) { 835 if (!HT[(h1 + k) % ht_size]) { 836 HT[(h1 + k) % ht_size] = key; 837 HD[(h1 + k) % ht_size] = a->a + j * bs2; 838 break; 839 #if defined(PETSC_USE_INFO) 840 } else { 841 ct++; 842 #endif 843 } 844 } 845 #if defined(PETSC_USE_INFO) 846 if (k > max) max = k; 847 #endif 848 } 849 } 850 /* Loop Over B */ 851 for (i = 0; i < b->mbs; i++) { 852 for (j = bi[i]; j < bi[i + 1]; j++) { 853 row = i + rstart; 854 col = garray[bj[j]]; 855 key = row * Nbs + col + 1; 856 h1 = HASH(ht_size, key, tmp); 857 for (k = 0; k < ht_size; k++) { 858 if (!HT[(h1 + k) % ht_size]) { 859 HT[(h1 + k) % ht_size] = key; 860 HD[(h1 + k) % ht_size] = b->a + j * bs2; 861 break; 862 #if defined(PETSC_USE_INFO) 863 } else { 864 ct++; 865 #endif 866 } 867 } 868 #if defined(PETSC_USE_INFO) 869 if (k > max) max = k; 870 #endif 871 } 872 } 873 874 /* Print Summary */ 875 #if defined(PETSC_USE_INFO) 876 for (i = 0, j = 0; i < ht_size; i++) { 877 if (HT[i]) j++; 878 } 879 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 880 #endif 881 PetscFunctionReturn(PETSC_SUCCESS); 882 } 883 884 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 885 { 886 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 887 PetscInt nstash, reallocs; 888 889 PetscFunctionBegin; 890 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 891 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 893 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 894 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 895 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 896 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 897 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 901 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 902 { 903 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 905 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 906 PetscInt *row, *col; 907 PetscBool r1, r2, r3, other_disassembled; 908 MatScalar *val; 909 PetscMPIInt n; 910 911 PetscFunctionBegin; 912 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 913 if (!baij->donotstash && !mat->nooffprocentries) { 914 while (1) { 915 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 916 if (!flg) break; 917 918 for (i = 0; i < n;) { 919 /* Now identify the consecutive vals belonging to the same row */ 920 for (j = i, rstart = row[j]; j < n; j++) { 921 if (row[j] != rstart) break; 922 } 923 if (j < n) ncols = j - i; 924 else ncols = n - i; 925 /* Now assemble all these values with a single function call */ 926 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 927 i = j; 928 } 929 } 930 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 931 /* Now process the block-stash. Since the values are stashed column-oriented, 932 set the row-oriented flag to column-oriented, and after MatSetValues() 933 restore the original flags */ 934 r1 = baij->roworiented; 935 r2 = a->roworiented; 936 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 937 938 baij->roworiented = PETSC_FALSE; 939 a->roworiented = PETSC_FALSE; 940 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 941 while (1) { 942 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 943 if (!flg) break; 944 945 for (i = 0; i < n;) { 946 /* Now identify the consecutive vals belonging to the same row */ 947 for (j = i, rstart = row[j]; j < n; j++) { 948 if (row[j] != rstart) break; 949 } 950 if (j < n) ncols = j - i; 951 else ncols = n - i; 952 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 953 i = j; 954 } 955 } 956 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 957 958 baij->roworiented = r1; 959 a->roworiented = r2; 960 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 961 } 962 963 PetscCall(MatAssemblyBegin(baij->A, mode)); 964 PetscCall(MatAssemblyEnd(baij->A, mode)); 965 966 /* determine if any processor has disassembled, if so we must 967 also disassemble ourselves, in order that we may reassemble. */ 968 /* 969 if nonzero structure of submatrix B cannot change then we know that 970 no processor disassembled thus we can skip this stuff 971 */ 972 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 973 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 974 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 975 } 976 977 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 978 PetscCall(MatAssemblyBegin(baij->B, mode)); 979 PetscCall(MatAssemblyEnd(baij->B, mode)); 980 981 #if defined(PETSC_USE_INFO) 982 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 983 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 984 985 baij->ht_total_ct = 0; 986 baij->ht_insert_ct = 0; 987 } 988 #endif 989 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 990 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 991 992 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 993 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 994 } 995 996 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 997 998 baij->rowvalues = NULL; 999 1000 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1001 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 1002 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1003 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1004 } 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1009 #include <petscdraw.h> 1010 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1011 { 1012 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1013 PetscMPIInt rank = baij->rank; 1014 PetscInt bs = mat->rmap->bs; 1015 PetscBool iascii, isdraw; 1016 PetscViewer sviewer; 1017 PetscViewerFormat format; 1018 1019 PetscFunctionBegin; 1020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1022 if (iascii) { 1023 PetscCall(PetscViewerGetFormat(viewer, &format)); 1024 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1025 MatInfo info; 1026 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1027 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1028 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1029 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1030 mat->rmap->bs, (double)info.memory)); 1031 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1033 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1034 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1035 PetscCall(PetscViewerFlush(viewer)); 1036 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1037 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1038 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1041 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1044 PetscFunctionReturn(PETSC_SUCCESS); 1045 } 1046 } 1047 1048 if (isdraw) { 1049 PetscDraw draw; 1050 PetscBool isnull; 1051 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1052 PetscCall(PetscDrawIsNull(draw, &isnull)); 1053 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 { 1057 /* assemble the entire matrix onto first processor. */ 1058 Mat A; 1059 Mat_SeqBAIJ *Aloc; 1060 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1061 MatScalar *a; 1062 const char *matname; 1063 1064 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1065 /* Perhaps this should be the type of mat? */ 1066 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1067 if (rank == 0) { 1068 PetscCall(MatSetSizes(A, M, N, M, N)); 1069 } else { 1070 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1071 } 1072 PetscCall(MatSetType(A, MATMPIBAIJ)); 1073 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1074 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1075 1076 /* copy over the A part */ 1077 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1078 ai = Aloc->i; 1079 aj = Aloc->j; 1080 a = Aloc->a; 1081 PetscCall(PetscMalloc1(bs, &rvals)); 1082 1083 for (i = 0; i < mbs; i++) { 1084 rvals[0] = bs * (baij->rstartbs + i); 1085 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1086 for (j = ai[i]; j < ai[i + 1]; j++) { 1087 col = (baij->cstartbs + aj[j]) * bs; 1088 for (k = 0; k < bs; k++) { 1089 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1090 col++; 1091 a += bs; 1092 } 1093 } 1094 } 1095 /* copy over the B part */ 1096 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1097 ai = Aloc->i; 1098 aj = Aloc->j; 1099 a = Aloc->a; 1100 for (i = 0; i < mbs; i++) { 1101 rvals[0] = bs * (baij->rstartbs + i); 1102 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1103 for (j = ai[i]; j < ai[i + 1]; j++) { 1104 col = baij->garray[aj[j]] * bs; 1105 for (k = 0; k < bs; k++) { 1106 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1107 col++; 1108 a += bs; 1109 } 1110 } 1111 } 1112 PetscCall(PetscFree(rvals)); 1113 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1114 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1115 /* 1116 Everyone has to call to draw the matrix since the graphics waits are 1117 synchronized across all processors that share the PetscDraw object 1118 */ 1119 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1120 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1121 if (rank == 0) { 1122 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1123 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1124 } 1125 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1126 PetscCall(MatDestroy(&A)); 1127 } 1128 PetscFunctionReturn(PETSC_SUCCESS); 1129 } 1130 1131 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1132 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1133 { 1134 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1135 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1136 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1137 const PetscInt *garray = aij->garray; 1138 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1139 PetscCount nz, hnz; 1140 PetscInt *rowlens, *colidxs; 1141 PetscScalar *matvals; 1142 PetscMPIInt rank; 1143 1144 PetscFunctionBegin; 1145 PetscCall(PetscViewerSetUp(viewer)); 1146 1147 M = mat->rmap->N; 1148 N = mat->cmap->N; 1149 m = mat->rmap->n; 1150 rs = mat->rmap->rstart; 1151 cs = mat->cmap->rstart; 1152 bs = mat->rmap->bs; 1153 nz = bs * bs * (A->nz + B->nz); 1154 1155 /* write matrix header */ 1156 header[0] = MAT_FILE_CLASSID; 1157 header[1] = M; 1158 header[2] = N; 1159 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1160 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1161 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1162 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1163 1164 /* fill in and store row lengths */ 1165 PetscCall(PetscMalloc1(m, &rowlens)); 1166 for (cnt = 0, i = 0; i < A->mbs; i++) 1167 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1168 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1169 PetscCall(PetscFree(rowlens)); 1170 1171 /* fill in and store column indices */ 1172 PetscCall(PetscMalloc1(nz, &colidxs)); 1173 for (cnt = 0, i = 0; i < A->mbs; i++) { 1174 for (k = 0; k < bs; k++) { 1175 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1176 if (garray[B->j[jb]] > cs / bs) break; 1177 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1178 } 1179 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1180 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1181 for (; jb < B->i[i + 1]; jb++) 1182 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1183 } 1184 } 1185 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1186 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1187 PetscCall(PetscFree(colidxs)); 1188 1189 /* fill in and store nonzero values */ 1190 PetscCall(PetscMalloc1(nz, &matvals)); 1191 for (cnt = 0, i = 0; i < A->mbs; i++) { 1192 for (k = 0; k < bs; k++) { 1193 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1194 if (garray[B->j[jb]] > cs / bs) break; 1195 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1196 } 1197 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1198 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1199 for (; jb < B->i[i + 1]; jb++) 1200 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1201 } 1202 } 1203 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1204 PetscCall(PetscFree(matvals)); 1205 1206 /* write block size option to the viewer's .info file */ 1207 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1208 PetscFunctionReturn(PETSC_SUCCESS); 1209 } 1210 1211 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1212 { 1213 PetscBool iascii, isdraw, issocket, isbinary; 1214 1215 PetscFunctionBegin; 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1219 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1220 if (iascii || isdraw || issocket) { 1221 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1222 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1223 PetscFunctionReturn(PETSC_SUCCESS); 1224 } 1225 1226 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1227 { 1228 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1229 PetscInt nt; 1230 1231 PetscFunctionBegin; 1232 PetscCall(VecGetLocalSize(xx, &nt)); 1233 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1234 PetscCall(VecGetLocalSize(yy, &nt)); 1235 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1236 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1237 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1238 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1239 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1240 PetscFunctionReturn(PETSC_SUCCESS); 1241 } 1242 1243 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1244 { 1245 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1246 1247 PetscFunctionBegin; 1248 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1249 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1250 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1251 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1256 { 1257 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1258 1259 PetscFunctionBegin; 1260 /* do nondiagonal part */ 1261 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1262 /* do local part */ 1263 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1264 /* add partial results together */ 1265 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1266 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1267 PetscFunctionReturn(PETSC_SUCCESS); 1268 } 1269 1270 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1271 { 1272 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1273 1274 PetscFunctionBegin; 1275 /* do nondiagonal part */ 1276 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1277 /* do local part */ 1278 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1279 /* add partial results together */ 1280 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1281 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 1285 /* 1286 This only works correctly for square matrices where the subblock A->A is the 1287 diagonal block 1288 */ 1289 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1290 { 1291 PetscFunctionBegin; 1292 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1293 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } 1296 1297 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1298 { 1299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1300 1301 PetscFunctionBegin; 1302 PetscCall(MatScale(a->A, aa)); 1303 PetscCall(MatScale(a->B, aa)); 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306 1307 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1308 { 1309 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1310 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1311 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1312 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1313 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1314 1315 PetscFunctionBegin; 1316 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1317 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1318 mat->getrowactive = PETSC_TRUE; 1319 1320 if (!mat->rowvalues && (idx || v)) { 1321 /* 1322 allocate enough space to hold information from the longest row. 1323 */ 1324 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1325 PetscInt max = 1, mbs = mat->mbs, tmp; 1326 for (i = 0; i < mbs; i++) { 1327 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1328 if (max < tmp) max = tmp; 1329 } 1330 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1331 } 1332 lrow = row - brstart; 1333 1334 pvA = &vworkA; 1335 pcA = &cworkA; 1336 pvB = &vworkB; 1337 pcB = &cworkB; 1338 if (!v) { 1339 pvA = NULL; 1340 pvB = NULL; 1341 } 1342 if (!idx) { 1343 pcA = NULL; 1344 if (!v) pcB = NULL; 1345 } 1346 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1347 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1348 nztot = nzA + nzB; 1349 1350 cmap = mat->garray; 1351 if (v || idx) { 1352 if (nztot) { 1353 /* Sort by increasing column numbers, assuming A and B already sorted */ 1354 PetscInt imark = -1; 1355 if (v) { 1356 *v = v_p = mat->rowvalues; 1357 for (i = 0; i < nzB; i++) { 1358 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1359 else break; 1360 } 1361 imark = i; 1362 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1363 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1364 } 1365 if (idx) { 1366 *idx = idx_p = mat->rowindices; 1367 if (imark > -1) { 1368 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1369 } else { 1370 for (i = 0; i < nzB; i++) { 1371 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1372 else break; 1373 } 1374 imark = i; 1375 } 1376 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1377 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1378 } 1379 } else { 1380 if (idx) *idx = NULL; 1381 if (v) *v = NULL; 1382 } 1383 } 1384 *nz = nztot; 1385 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1386 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1387 PetscFunctionReturn(PETSC_SUCCESS); 1388 } 1389 1390 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1391 { 1392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1393 1394 PetscFunctionBegin; 1395 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1396 baij->getrowactive = PETSC_FALSE; 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1401 { 1402 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1403 1404 PetscFunctionBegin; 1405 PetscCall(MatZeroEntries(l->A)); 1406 PetscCall(MatZeroEntries(l->B)); 1407 PetscFunctionReturn(PETSC_SUCCESS); 1408 } 1409 1410 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1413 Mat A = a->A, B = a->B; 1414 PetscLogDouble isend[5], irecv[5]; 1415 1416 PetscFunctionBegin; 1417 info->block_size = (PetscReal)matin->rmap->bs; 1418 1419 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1420 1421 isend[0] = info->nz_used; 1422 isend[1] = info->nz_allocated; 1423 isend[2] = info->nz_unneeded; 1424 isend[3] = info->memory; 1425 isend[4] = info->mallocs; 1426 1427 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1428 1429 isend[0] += info->nz_used; 1430 isend[1] += info->nz_allocated; 1431 isend[2] += info->nz_unneeded; 1432 isend[3] += info->memory; 1433 isend[4] += info->mallocs; 1434 1435 if (flag == MAT_LOCAL) { 1436 info->nz_used = isend[0]; 1437 info->nz_allocated = isend[1]; 1438 info->nz_unneeded = isend[2]; 1439 info->memory = isend[3]; 1440 info->mallocs = isend[4]; 1441 } else if (flag == MAT_GLOBAL_MAX) { 1442 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1443 1444 info->nz_used = irecv[0]; 1445 info->nz_allocated = irecv[1]; 1446 info->nz_unneeded = irecv[2]; 1447 info->memory = irecv[3]; 1448 info->mallocs = irecv[4]; 1449 } else if (flag == MAT_GLOBAL_SUM) { 1450 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1451 1452 info->nz_used = irecv[0]; 1453 info->nz_allocated = irecv[1]; 1454 info->nz_unneeded = irecv[2]; 1455 info->memory = irecv[3]; 1456 info->mallocs = irecv[4]; 1457 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1458 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1459 info->fill_ratio_needed = 0; 1460 info->factor_mallocs = 0; 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1465 { 1466 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1467 1468 PetscFunctionBegin; 1469 switch (op) { 1470 case MAT_NEW_NONZERO_LOCATIONS: 1471 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1472 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1473 case MAT_KEEP_NONZERO_PATTERN: 1474 case MAT_NEW_NONZERO_LOCATION_ERR: 1475 MatCheckPreallocated(A, 1); 1476 PetscCall(MatSetOption(a->A, op, flg)); 1477 PetscCall(MatSetOption(a->B, op, flg)); 1478 break; 1479 case MAT_ROW_ORIENTED: 1480 MatCheckPreallocated(A, 1); 1481 a->roworiented = flg; 1482 1483 PetscCall(MatSetOption(a->A, op, flg)); 1484 PetscCall(MatSetOption(a->B, op, flg)); 1485 break; 1486 case MAT_FORCE_DIAGONAL_ENTRIES: 1487 case MAT_SORTED_FULL: 1488 case MAT_SUBMAT_SINGLEIS: 1489 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1490 break; 1491 case MAT_IGNORE_OFF_PROC_ENTRIES: 1492 a->donotstash = flg; 1493 break; 1494 case MAT_USE_HASH_TABLE: 1495 a->ht_flag = flg; 1496 a->ht_fact = 1.39; 1497 break; 1498 case MAT_SYMMETRIC: 1499 case MAT_STRUCTURALLY_SYMMETRIC: 1500 case MAT_HERMITIAN: 1501 case MAT_SYMMETRY_ETERNAL: 1502 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1503 case MAT_SPD_ETERNAL: 1504 /* if the diagonal matrix is square it inherits some of the properties above */ 1505 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1506 break; 1507 default: 1508 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1509 } 1510 PetscFunctionReturn(PETSC_SUCCESS); 1511 } 1512 1513 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1514 { 1515 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1516 Mat_SeqBAIJ *Aloc; 1517 Mat B; 1518 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1519 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1520 MatScalar *a; 1521 1522 PetscFunctionBegin; 1523 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1524 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1525 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1526 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1527 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1528 /* Do not know preallocation information, but must set block size */ 1529 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1530 } else { 1531 B = *matout; 1532 } 1533 1534 /* copy over the A part */ 1535 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1536 ai = Aloc->i; 1537 aj = Aloc->j; 1538 a = Aloc->a; 1539 PetscCall(PetscMalloc1(bs, &rvals)); 1540 1541 for (i = 0; i < mbs; i++) { 1542 rvals[0] = bs * (baij->rstartbs + i); 1543 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1544 for (j = ai[i]; j < ai[i + 1]; j++) { 1545 col = (baij->cstartbs + aj[j]) * bs; 1546 for (k = 0; k < bs; k++) { 1547 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1548 1549 col++; 1550 a += bs; 1551 } 1552 } 1553 } 1554 /* copy over the B part */ 1555 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1556 ai = Aloc->i; 1557 aj = Aloc->j; 1558 a = Aloc->a; 1559 for (i = 0; i < mbs; i++) { 1560 rvals[0] = bs * (baij->rstartbs + i); 1561 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1562 for (j = ai[i]; j < ai[i + 1]; j++) { 1563 col = baij->garray[aj[j]] * bs; 1564 for (k = 0; k < bs; k++) { 1565 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1566 col++; 1567 a += bs; 1568 } 1569 } 1570 } 1571 PetscCall(PetscFree(rvals)); 1572 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1574 1575 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1576 else PetscCall(MatHeaderMerge(A, &B)); 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1581 { 1582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1583 Mat a = baij->A, b = baij->B; 1584 PetscInt s1, s2, s3; 1585 1586 PetscFunctionBegin; 1587 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1588 if (rr) { 1589 PetscCall(VecGetLocalSize(rr, &s1)); 1590 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1591 /* Overlap communication with computation. */ 1592 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1593 } 1594 if (ll) { 1595 PetscCall(VecGetLocalSize(ll, &s1)); 1596 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1597 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1598 } 1599 /* scale the diagonal block */ 1600 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1601 1602 if (rr) { 1603 /* Do a scatter end and then right scale the off-diagonal block */ 1604 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1605 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1606 } 1607 PetscFunctionReturn(PETSC_SUCCESS); 1608 } 1609 1610 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1611 { 1612 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1613 PetscInt *lrows; 1614 PetscInt r, len; 1615 PetscBool cong; 1616 1617 PetscFunctionBegin; 1618 /* get locally owned rows */ 1619 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1620 /* fix right-hand side if needed */ 1621 if (x && b) { 1622 const PetscScalar *xx; 1623 PetscScalar *bb; 1624 1625 PetscCall(VecGetArrayRead(x, &xx)); 1626 PetscCall(VecGetArray(b, &bb)); 1627 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1628 PetscCall(VecRestoreArrayRead(x, &xx)); 1629 PetscCall(VecRestoreArray(b, &bb)); 1630 } 1631 1632 /* actually zap the local rows */ 1633 /* 1634 Zero the required rows. If the "diagonal block" of the matrix 1635 is square and the user wishes to set the diagonal we use separate 1636 code so that MatSetValues() is not called for each diagonal allocating 1637 new memory, thus calling lots of mallocs and slowing things down. 1638 1639 */ 1640 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1641 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1642 PetscCall(MatHasCongruentLayouts(A, &cong)); 1643 if ((diag != 0.0) && cong) { 1644 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1645 } else if (diag != 0.0) { 1646 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1647 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1648 for (r = 0; r < len; ++r) { 1649 const PetscInt row = lrows[r] + A->rmap->rstart; 1650 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1651 } 1652 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1653 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1654 } else { 1655 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1656 } 1657 PetscCall(PetscFree(lrows)); 1658 1659 /* only change matrix nonzero state if pattern was allowed to be changed */ 1660 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1661 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1662 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1663 } 1664 PetscFunctionReturn(PETSC_SUCCESS); 1665 } 1666 1667 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1668 { 1669 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1670 PetscMPIInt n, p = 0; 1671 PetscInt i, j, k, r, len = 0, row, col, count; 1672 PetscInt *lrows, *owners = A->rmap->range; 1673 PetscSFNode *rrows; 1674 PetscSF sf; 1675 const PetscScalar *xx; 1676 PetscScalar *bb, *mask; 1677 Vec xmask, lmask; 1678 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1679 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1680 PetscScalar *aa; 1681 1682 PetscFunctionBegin; 1683 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1684 /* Create SF where leaves are input rows and roots are owned rows */ 1685 PetscCall(PetscMalloc1(n, &lrows)); 1686 for (r = 0; r < n; ++r) lrows[r] = -1; 1687 PetscCall(PetscMalloc1(N, &rrows)); 1688 for (r = 0; r < N; ++r) { 1689 const PetscInt idx = rows[r]; 1690 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1691 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1692 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1693 } 1694 rrows[r].rank = p; 1695 rrows[r].index = rows[r] - owners[p]; 1696 } 1697 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1698 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1699 /* Collect flags for rows to be zeroed */ 1700 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1701 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1702 PetscCall(PetscSFDestroy(&sf)); 1703 /* Compress and put in row numbers */ 1704 for (r = 0; r < n; ++r) 1705 if (lrows[r] >= 0) lrows[len++] = r; 1706 /* zero diagonal part of matrix */ 1707 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1708 /* handle off-diagonal part of matrix */ 1709 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1710 PetscCall(VecDuplicate(l->lvec, &lmask)); 1711 PetscCall(VecGetArray(xmask, &bb)); 1712 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1713 PetscCall(VecRestoreArray(xmask, &bb)); 1714 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1716 PetscCall(VecDestroy(&xmask)); 1717 if (x) { 1718 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1719 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1720 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1721 PetscCall(VecGetArray(b, &bb)); 1722 } 1723 PetscCall(VecGetArray(lmask, &mask)); 1724 /* remove zeroed rows of off-diagonal matrix */ 1725 for (i = 0; i < len; ++i) { 1726 row = lrows[i]; 1727 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1728 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1729 for (k = 0; k < count; ++k) { 1730 aa[0] = 0.0; 1731 aa += bs; 1732 } 1733 } 1734 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1735 for (i = 0; i < l->B->rmap->N; ++i) { 1736 row = i / bs; 1737 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1738 for (k = 0; k < bs; ++k) { 1739 col = bs * baij->j[j] + k; 1740 if (PetscAbsScalar(mask[col])) { 1741 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1742 if (x) bb[i] -= aa[0] * xx[col]; 1743 aa[0] = 0.0; 1744 } 1745 } 1746 } 1747 } 1748 if (x) { 1749 PetscCall(VecRestoreArray(b, &bb)); 1750 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1751 } 1752 PetscCall(VecRestoreArray(lmask, &mask)); 1753 PetscCall(VecDestroy(&lmask)); 1754 PetscCall(PetscFree(lrows)); 1755 1756 /* only change matrix nonzero state if pattern was allowed to be changed */ 1757 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1758 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1759 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1760 } 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 1764 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1765 { 1766 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1767 1768 PetscFunctionBegin; 1769 PetscCall(MatSetUnfactored(a->A)); 1770 PetscFunctionReturn(PETSC_SUCCESS); 1771 } 1772 1773 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1774 1775 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1776 { 1777 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1778 Mat a, b, c, d; 1779 PetscBool flg; 1780 1781 PetscFunctionBegin; 1782 a = matA->A; 1783 b = matA->B; 1784 c = matB->A; 1785 d = matB->B; 1786 1787 PetscCall(MatEqual(a, c, &flg)); 1788 if (flg) PetscCall(MatEqual(b, d, &flg)); 1789 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1794 { 1795 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1796 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 PetscCall(MatCopy_Basic(A, B, str)); 1802 } else { 1803 PetscCall(MatCopy(a->A, b->A, str)); 1804 PetscCall(MatCopy(a->B, b->B, str)); 1805 } 1806 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1807 PetscFunctionReturn(PETSC_SUCCESS); 1808 } 1809 1810 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1811 { 1812 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1813 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1814 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1815 1816 PetscFunctionBegin; 1817 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 1821 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1822 { 1823 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1824 PetscBLASInt bnz, one = 1; 1825 Mat_SeqBAIJ *x, *y; 1826 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1827 1828 PetscFunctionBegin; 1829 if (str == SAME_NONZERO_PATTERN) { 1830 PetscScalar alpha = a; 1831 x = (Mat_SeqBAIJ *)xx->A->data; 1832 y = (Mat_SeqBAIJ *)yy->A->data; 1833 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1834 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1835 x = (Mat_SeqBAIJ *)xx->B->data; 1836 y = (Mat_SeqBAIJ *)yy->B->data; 1837 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1838 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1839 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1840 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1841 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1842 } else { 1843 Mat B; 1844 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1845 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1846 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1847 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1848 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1849 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1850 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1851 PetscCall(MatSetType(B, MATMPIBAIJ)); 1852 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1853 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1854 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1855 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1856 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1857 PetscCall(MatHeaderMerge(Y, &B)); 1858 PetscCall(PetscFree(nnz_d)); 1859 PetscCall(PetscFree(nnz_o)); 1860 } 1861 PetscFunctionReturn(PETSC_SUCCESS); 1862 } 1863 1864 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1865 { 1866 PetscFunctionBegin; 1867 if (PetscDefined(USE_COMPLEX)) { 1868 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1869 1870 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1871 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1872 } 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1877 { 1878 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1879 1880 PetscFunctionBegin; 1881 PetscCall(MatRealPart(a->A)); 1882 PetscCall(MatRealPart(a->B)); 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1887 { 1888 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1889 1890 PetscFunctionBegin; 1891 PetscCall(MatImaginaryPart(a->A)); 1892 PetscCall(MatImaginaryPart(a->B)); 1893 PetscFunctionReturn(PETSC_SUCCESS); 1894 } 1895 1896 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1897 { 1898 IS iscol_local; 1899 PetscInt csize; 1900 1901 PetscFunctionBegin; 1902 PetscCall(ISGetLocalSize(iscol, &csize)); 1903 if (call == MAT_REUSE_MATRIX) { 1904 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1905 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1906 } else { 1907 PetscCall(ISAllGather(iscol, &iscol_local)); 1908 } 1909 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1910 if (call == MAT_INITIAL_MATRIX) { 1911 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1912 PetscCall(ISDestroy(&iscol_local)); 1913 } 1914 PetscFunctionReturn(PETSC_SUCCESS); 1915 } 1916 1917 /* 1918 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1919 in local and then by concatenating the local matrices the end result. 1920 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1921 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1922 */ 1923 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1924 { 1925 PetscMPIInt rank, size; 1926 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1927 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1928 Mat M, Mreuse; 1929 MatScalar *vwork, *aa; 1930 MPI_Comm comm; 1931 IS isrow_new, iscol_new; 1932 Mat_SeqBAIJ *aij; 1933 1934 PetscFunctionBegin; 1935 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1936 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1937 PetscCallMPI(MPI_Comm_size(comm, &size)); 1938 /* The compression and expansion should be avoided. Doesn't point 1939 out errors, might change the indices, hence buggey */ 1940 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1941 if (isrow == iscol) { 1942 iscol_new = isrow_new; 1943 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1944 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1945 1946 if (call == MAT_REUSE_MATRIX) { 1947 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1948 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1949 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1950 } else { 1951 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1952 } 1953 PetscCall(ISDestroy(&isrow_new)); 1954 PetscCall(ISDestroy(&iscol_new)); 1955 /* 1956 m - number of local rows 1957 n - number of columns (same on all processors) 1958 rstart - first row in new global matrix generated 1959 */ 1960 PetscCall(MatGetBlockSize(mat, &bs)); 1961 PetscCall(MatGetSize(Mreuse, &m, &n)); 1962 m = m / bs; 1963 n = n / bs; 1964 1965 if (call == MAT_INITIAL_MATRIX) { 1966 aij = (Mat_SeqBAIJ *)Mreuse->data; 1967 ii = aij->i; 1968 jj = aij->j; 1969 1970 /* 1971 Determine the number of non-zeros in the diagonal and off-diagonal 1972 portions of the matrix in order to do correct preallocation 1973 */ 1974 1975 /* first get start and end of "diagonal" columns */ 1976 if (csize == PETSC_DECIDE) { 1977 PetscCall(ISGetSize(isrow, &mglobal)); 1978 if (mglobal == n * bs) { /* square matrix */ 1979 nlocal = m; 1980 } else { 1981 nlocal = n / size + ((n % size) > rank); 1982 } 1983 } else { 1984 nlocal = csize / bs; 1985 } 1986 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1987 rstart = rend - nlocal; 1988 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1989 1990 /* next, compute all the lengths */ 1991 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1992 for (i = 0; i < m; i++) { 1993 jend = ii[i + 1] - ii[i]; 1994 olen = 0; 1995 dlen = 0; 1996 for (j = 0; j < jend; j++) { 1997 if (*jj < rstart || *jj >= rend) olen++; 1998 else dlen++; 1999 jj++; 2000 } 2001 olens[i] = olen; 2002 dlens[i] = dlen; 2003 } 2004 PetscCall(MatCreate(comm, &M)); 2005 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2006 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2007 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2008 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2009 PetscCall(PetscFree2(dlens, olens)); 2010 } else { 2011 PetscInt ml, nl; 2012 2013 M = *newmat; 2014 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2015 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2016 PetscCall(MatZeroEntries(M)); 2017 /* 2018 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2019 rather than the slower MatSetValues(). 2020 */ 2021 M->was_assembled = PETSC_TRUE; 2022 M->assembled = PETSC_FALSE; 2023 } 2024 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2025 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2026 aij = (Mat_SeqBAIJ *)Mreuse->data; 2027 ii = aij->i; 2028 jj = aij->j; 2029 aa = aij->a; 2030 for (i = 0; i < m; i++) { 2031 row = rstart / bs + i; 2032 nz = ii[i + 1] - ii[i]; 2033 cwork = jj; 2034 jj = PetscSafePointerPlusOffset(jj, nz); 2035 vwork = aa; 2036 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2037 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2038 } 2039 2040 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2041 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2042 *newmat = M; 2043 2044 /* save submatrix used in processor for next request */ 2045 if (call == MAT_INITIAL_MATRIX) { 2046 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2047 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2048 } 2049 PetscFunctionReturn(PETSC_SUCCESS); 2050 } 2051 2052 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2053 { 2054 MPI_Comm comm, pcomm; 2055 PetscInt clocal_size, nrows; 2056 const PetscInt *rows; 2057 PetscMPIInt size; 2058 IS crowp, lcolp; 2059 2060 PetscFunctionBegin; 2061 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2062 /* make a collective version of 'rowp' */ 2063 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2064 if (pcomm == comm) { 2065 crowp = rowp; 2066 } else { 2067 PetscCall(ISGetSize(rowp, &nrows)); 2068 PetscCall(ISGetIndices(rowp, &rows)); 2069 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2070 PetscCall(ISRestoreIndices(rowp, &rows)); 2071 } 2072 PetscCall(ISSetPermutation(crowp)); 2073 /* make a local version of 'colp' */ 2074 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2075 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2076 if (size == 1) { 2077 lcolp = colp; 2078 } else { 2079 PetscCall(ISAllGather(colp, &lcolp)); 2080 } 2081 PetscCall(ISSetPermutation(lcolp)); 2082 /* now we just get the submatrix */ 2083 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2084 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2085 /* clean up */ 2086 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2087 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2088 PetscFunctionReturn(PETSC_SUCCESS); 2089 } 2090 2091 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2092 { 2093 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2094 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2095 2096 PetscFunctionBegin; 2097 if (nghosts) *nghosts = B->nbs; 2098 if (ghosts) *ghosts = baij->garray; 2099 PetscFunctionReturn(PETSC_SUCCESS); 2100 } 2101 2102 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2103 { 2104 Mat B; 2105 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2106 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2107 Mat_SeqAIJ *b; 2108 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2109 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2110 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2111 2112 PetscFunctionBegin; 2113 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2114 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2115 2116 /* Tell every processor the number of nonzeros per row */ 2117 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2118 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2119 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2120 displs = recvcounts + size; 2121 for (i = 0; i < size; i++) { 2122 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2123 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2124 } 2125 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2126 /* Create the sequential matrix of the same type as the local block diagonal */ 2127 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2128 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2129 PetscCall(MatSetType(B, MATSEQAIJ)); 2130 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2131 b = (Mat_SeqAIJ *)B->data; 2132 2133 /* Copy my part of matrix column indices over */ 2134 sendcount = ad->nz + bd->nz; 2135 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2136 a_jsendbuf = ad->j; 2137 b_jsendbuf = bd->j; 2138 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2139 cnt = 0; 2140 for (i = 0; i < n; i++) { 2141 /* put in lower diagonal portion */ 2142 m = bd->i[i + 1] - bd->i[i]; 2143 while (m > 0) { 2144 /* is it above diagonal (in bd (compressed) numbering) */ 2145 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2146 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2147 m--; 2148 } 2149 2150 /* put in diagonal portion */ 2151 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2152 2153 /* put in upper diagonal portion */ 2154 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2155 } 2156 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2157 2158 /* Gather all column indices to all processors */ 2159 for (i = 0; i < size; i++) { 2160 recvcounts[i] = 0; 2161 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2162 } 2163 displs[0] = 0; 2164 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2165 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2166 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2167 /* set the b->ilen (length of each row) values */ 2168 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2169 /* set the b->i indices */ 2170 b->i[0] = 0; 2171 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2172 PetscCall(PetscFree(lens)); 2173 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2174 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2175 PetscCall(PetscFree(recvcounts)); 2176 2177 PetscCall(MatPropagateSymmetryOptions(A, B)); 2178 *newmat = B; 2179 PetscFunctionReturn(PETSC_SUCCESS); 2180 } 2181 2182 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2183 { 2184 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2185 Vec bb1 = NULL; 2186 2187 PetscFunctionBegin; 2188 if (flag == SOR_APPLY_UPPER) { 2189 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2190 PetscFunctionReturn(PETSC_SUCCESS); 2191 } 2192 2193 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2194 2195 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2196 if (flag & SOR_ZERO_INITIAL_GUESS) { 2197 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2198 its--; 2199 } 2200 2201 while (its--) { 2202 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2203 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2204 2205 /* update rhs: bb1 = bb - B*x */ 2206 PetscCall(VecScale(mat->lvec, -1.0)); 2207 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2208 2209 /* local sweep */ 2210 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2211 } 2212 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2213 if (flag & SOR_ZERO_INITIAL_GUESS) { 2214 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2215 its--; 2216 } 2217 while (its--) { 2218 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2219 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2220 2221 /* update rhs: bb1 = bb - B*x */ 2222 PetscCall(VecScale(mat->lvec, -1.0)); 2223 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2224 2225 /* local sweep */ 2226 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2227 } 2228 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2229 if (flag & SOR_ZERO_INITIAL_GUESS) { 2230 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2231 its--; 2232 } 2233 while (its--) { 2234 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2235 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2236 2237 /* update rhs: bb1 = bb - B*x */ 2238 PetscCall(VecScale(mat->lvec, -1.0)); 2239 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2240 2241 /* local sweep */ 2242 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2243 } 2244 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2245 2246 PetscCall(VecDestroy(&bb1)); 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2251 { 2252 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2253 PetscInt m, N, i, *garray = aij->garray; 2254 PetscInt ib, jb, bs = A->rmap->bs; 2255 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2256 MatScalar *a_val = a_aij->a; 2257 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2258 MatScalar *b_val = b_aij->a; 2259 PetscReal *work; 2260 PetscMPIInt iN; 2261 2262 PetscFunctionBegin; 2263 PetscCall(MatGetSize(A, &m, &N)); 2264 PetscCall(PetscCalloc1(N, &work)); 2265 if (type == NORM_2) { 2266 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2267 for (jb = 0; jb < bs; jb++) { 2268 for (ib = 0; ib < bs; ib++) { 2269 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2270 a_val++; 2271 } 2272 } 2273 } 2274 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2275 for (jb = 0; jb < bs; jb++) { 2276 for (ib = 0; ib < bs; ib++) { 2277 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2278 b_val++; 2279 } 2280 } 2281 } 2282 } else if (type == NORM_1) { 2283 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2284 for (jb = 0; jb < bs; jb++) { 2285 for (ib = 0; ib < bs; ib++) { 2286 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2287 a_val++; 2288 } 2289 } 2290 } 2291 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2292 for (jb = 0; jb < bs; jb++) { 2293 for (ib = 0; ib < bs; ib++) { 2294 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2295 b_val++; 2296 } 2297 } 2298 } 2299 } else if (type == NORM_INFINITY) { 2300 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2301 for (jb = 0; jb < bs; jb++) { 2302 for (ib = 0; ib < bs; ib++) { 2303 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2304 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2305 a_val++; 2306 } 2307 } 2308 } 2309 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2310 for (jb = 0; jb < bs; jb++) { 2311 for (ib = 0; ib < bs; ib++) { 2312 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2313 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2314 b_val++; 2315 } 2316 } 2317 } 2318 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2319 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2320 for (jb = 0; jb < bs; jb++) { 2321 for (ib = 0; ib < bs; ib++) { 2322 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2323 a_val++; 2324 } 2325 } 2326 } 2327 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2328 for (jb = 0; jb < bs; jb++) { 2329 for (ib = 0; ib < bs; ib++) { 2330 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2331 b_val++; 2332 } 2333 } 2334 } 2335 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2336 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2337 for (jb = 0; jb < bs; jb++) { 2338 for (ib = 0; ib < bs; ib++) { 2339 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2340 a_val++; 2341 } 2342 } 2343 } 2344 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2345 for (jb = 0; jb < bs; jb++) { 2346 for (ib = 0; ib < bs; ib++) { 2347 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2348 b_val++; 2349 } 2350 } 2351 } 2352 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2353 PetscCall(PetscMPIIntCast(N, &iN)); 2354 if (type == NORM_INFINITY) { 2355 PetscCallMPI(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2356 } else { 2357 PetscCallMPI(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2358 } 2359 PetscCall(PetscFree(work)); 2360 if (type == NORM_2) { 2361 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2362 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2363 for (i = 0; i < N; i++) reductions[i] /= m; 2364 } 2365 PetscFunctionReturn(PETSC_SUCCESS); 2366 } 2367 2368 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2369 { 2370 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2371 2372 PetscFunctionBegin; 2373 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2374 A->factorerrortype = a->A->factorerrortype; 2375 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2376 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2377 PetscFunctionReturn(PETSC_SUCCESS); 2378 } 2379 2380 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2381 { 2382 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2383 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2384 2385 PetscFunctionBegin; 2386 if (!Y->preallocated) { 2387 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2388 } else if (!aij->nz) { 2389 PetscInt nonew = aij->nonew; 2390 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2391 aij->nonew = nonew; 2392 } 2393 PetscCall(MatShift_Basic(Y, a)); 2394 PetscFunctionReturn(PETSC_SUCCESS); 2395 } 2396 2397 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2398 { 2399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2400 2401 PetscFunctionBegin; 2402 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2403 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2404 if (d) { 2405 PetscInt rstart; 2406 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2407 *d += rstart / A->rmap->bs; 2408 } 2409 PetscFunctionReturn(PETSC_SUCCESS); 2410 } 2411 2412 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2413 { 2414 PetscFunctionBegin; 2415 *a = ((Mat_MPIBAIJ *)A->data)->A; 2416 PetscFunctionReturn(PETSC_SUCCESS); 2417 } 2418 2419 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2420 { 2421 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2422 2423 PetscFunctionBegin; 2424 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2425 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2426 PetscFunctionReturn(PETSC_SUCCESS); 2427 } 2428 2429 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2430 MatGetRow_MPIBAIJ, 2431 MatRestoreRow_MPIBAIJ, 2432 MatMult_MPIBAIJ, 2433 /* 4*/ MatMultAdd_MPIBAIJ, 2434 MatMultTranspose_MPIBAIJ, 2435 MatMultTransposeAdd_MPIBAIJ, 2436 NULL, 2437 NULL, 2438 NULL, 2439 /*10*/ NULL, 2440 NULL, 2441 NULL, 2442 MatSOR_MPIBAIJ, 2443 MatTranspose_MPIBAIJ, 2444 /*15*/ MatGetInfo_MPIBAIJ, 2445 MatEqual_MPIBAIJ, 2446 MatGetDiagonal_MPIBAIJ, 2447 MatDiagonalScale_MPIBAIJ, 2448 MatNorm_MPIBAIJ, 2449 /*20*/ MatAssemblyBegin_MPIBAIJ, 2450 MatAssemblyEnd_MPIBAIJ, 2451 MatSetOption_MPIBAIJ, 2452 MatZeroEntries_MPIBAIJ, 2453 /*24*/ MatZeroRows_MPIBAIJ, 2454 NULL, 2455 NULL, 2456 NULL, 2457 NULL, 2458 /*29*/ MatSetUp_MPI_Hash, 2459 NULL, 2460 NULL, 2461 MatGetDiagonalBlock_MPIBAIJ, 2462 NULL, 2463 /*34*/ MatDuplicate_MPIBAIJ, 2464 NULL, 2465 NULL, 2466 NULL, 2467 NULL, 2468 /*39*/ MatAXPY_MPIBAIJ, 2469 MatCreateSubMatrices_MPIBAIJ, 2470 MatIncreaseOverlap_MPIBAIJ, 2471 MatGetValues_MPIBAIJ, 2472 MatCopy_MPIBAIJ, 2473 /*44*/ NULL, 2474 MatScale_MPIBAIJ, 2475 MatShift_MPIBAIJ, 2476 NULL, 2477 MatZeroRowsColumns_MPIBAIJ, 2478 /*49*/ NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 NULL, 2483 /*54*/ MatFDColoringCreate_MPIXAIJ, 2484 NULL, 2485 MatSetUnfactored_MPIBAIJ, 2486 MatPermute_MPIBAIJ, 2487 MatSetValuesBlocked_MPIBAIJ, 2488 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2489 MatDestroy_MPIBAIJ, 2490 MatView_MPIBAIJ, 2491 NULL, 2492 NULL, 2493 /*64*/ NULL, 2494 NULL, 2495 NULL, 2496 NULL, 2497 NULL, 2498 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2499 NULL, 2500 NULL, 2501 NULL, 2502 NULL, 2503 /*74*/ NULL, 2504 MatFDColoringApply_BAIJ, 2505 NULL, 2506 NULL, 2507 NULL, 2508 /*79*/ NULL, 2509 NULL, 2510 NULL, 2511 NULL, 2512 MatLoad_MPIBAIJ, 2513 /*84*/ NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 /*89*/ NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 /*94*/ NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 NULL, 2528 /*99*/ NULL, 2529 NULL, 2530 NULL, 2531 MatConjugate_MPIBAIJ, 2532 NULL, 2533 /*104*/ NULL, 2534 MatRealPart_MPIBAIJ, 2535 MatImaginaryPart_MPIBAIJ, 2536 NULL, 2537 NULL, 2538 /*109*/ NULL, 2539 NULL, 2540 NULL, 2541 NULL, 2542 MatMissingDiagonal_MPIBAIJ, 2543 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2544 NULL, 2545 MatGetGhosts_MPIBAIJ, 2546 NULL, 2547 NULL, 2548 /*119*/ NULL, 2549 NULL, 2550 NULL, 2551 NULL, 2552 MatGetMultiProcBlock_MPIBAIJ, 2553 /*124*/ NULL, 2554 MatGetColumnReductions_MPIBAIJ, 2555 MatInvertBlockDiagonal_MPIBAIJ, 2556 NULL, 2557 NULL, 2558 /*129*/ NULL, 2559 NULL, 2560 NULL, 2561 NULL, 2562 NULL, 2563 /*134*/ NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 NULL, 2568 /*139*/ MatSetBlockSizes_Default, 2569 NULL, 2570 NULL, 2571 MatFDColoringSetUp_MPIXAIJ, 2572 NULL, 2573 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2574 NULL, 2575 NULL, 2576 NULL, 2577 NULL, 2578 NULL, 2579 /*150*/ NULL, 2580 MatEliminateZeros_MPIBAIJ, 2581 MatGetRowSumAbs_MPIBAIJ, 2582 NULL, 2583 NULL, 2584 NULL}; 2585 2586 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2587 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2588 2589 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2590 { 2591 PetscInt m, rstart, cstart, cend; 2592 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2593 const PetscInt *JJ = NULL; 2594 PetscScalar *values = NULL; 2595 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2596 PetscBool nooffprocentries; 2597 2598 PetscFunctionBegin; 2599 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2600 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2601 PetscCall(PetscLayoutSetUp(B->rmap)); 2602 PetscCall(PetscLayoutSetUp(B->cmap)); 2603 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2604 m = B->rmap->n / bs; 2605 rstart = B->rmap->rstart / bs; 2606 cstart = B->cmap->rstart / bs; 2607 cend = B->cmap->rend / bs; 2608 2609 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2610 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2611 for (i = 0; i < m; i++) { 2612 nz = ii[i + 1] - ii[i]; 2613 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2614 nz_max = PetscMax(nz_max, nz); 2615 dlen = 0; 2616 olen = 0; 2617 JJ = jj + ii[i]; 2618 for (j = 0; j < nz; j++) { 2619 if (*JJ < cstart || *JJ >= cend) olen++; 2620 else dlen++; 2621 JJ++; 2622 } 2623 d_nnz[i] = dlen; 2624 o_nnz[i] = olen; 2625 } 2626 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2627 PetscCall(PetscFree2(d_nnz, o_nnz)); 2628 2629 values = (PetscScalar *)V; 2630 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2631 for (i = 0; i < m; i++) { 2632 PetscInt row = i + rstart; 2633 PetscInt ncols = ii[i + 1] - ii[i]; 2634 const PetscInt *icols = jj + ii[i]; 2635 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2636 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2637 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2638 } else { /* block ordering does not match so we can only insert one block at a time. */ 2639 PetscInt j; 2640 for (j = 0; j < ncols; j++) { 2641 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2642 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2643 } 2644 } 2645 } 2646 2647 if (!V) PetscCall(PetscFree(values)); 2648 nooffprocentries = B->nooffprocentries; 2649 B->nooffprocentries = PETSC_TRUE; 2650 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2651 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2652 B->nooffprocentries = nooffprocentries; 2653 2654 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2655 PetscFunctionReturn(PETSC_SUCCESS); 2656 } 2657 2658 /*@C 2659 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2660 2661 Collective 2662 2663 Input Parameters: 2664 + B - the matrix 2665 . bs - the block size 2666 . i - the indices into `j` for the start of each local row (starts with zero) 2667 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2668 - v - optional values in the matrix, use `NULL` if not provided 2669 2670 Level: advanced 2671 2672 Notes: 2673 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2674 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2675 called this routine. 2676 2677 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2678 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2679 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2680 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2681 block column and the second index is over columns within a block. 2682 2683 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2684 2685 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2686 @*/ 2687 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2688 { 2689 PetscFunctionBegin; 2690 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2691 PetscValidType(B, 1); 2692 PetscValidLogicalCollectiveInt(B, bs, 2); 2693 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2694 PetscFunctionReturn(PETSC_SUCCESS); 2695 } 2696 2697 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2698 { 2699 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2700 PetscInt i; 2701 PetscMPIInt size; 2702 2703 PetscFunctionBegin; 2704 if (B->hash_active) { 2705 B->ops[0] = b->cops; 2706 B->hash_active = PETSC_FALSE; 2707 } 2708 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2709 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2710 PetscCall(PetscLayoutSetUp(B->rmap)); 2711 PetscCall(PetscLayoutSetUp(B->cmap)); 2712 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2713 2714 if (d_nnz) { 2715 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2716 } 2717 if (o_nnz) { 2718 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2719 } 2720 2721 b->bs2 = bs * bs; 2722 b->mbs = B->rmap->n / bs; 2723 b->nbs = B->cmap->n / bs; 2724 b->Mbs = B->rmap->N / bs; 2725 b->Nbs = B->cmap->N / bs; 2726 2727 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2728 b->rstartbs = B->rmap->rstart / bs; 2729 b->rendbs = B->rmap->rend / bs; 2730 b->cstartbs = B->cmap->rstart / bs; 2731 b->cendbs = B->cmap->rend / bs; 2732 2733 #if defined(PETSC_USE_CTABLE) 2734 PetscCall(PetscHMapIDestroy(&b->colmap)); 2735 #else 2736 PetscCall(PetscFree(b->colmap)); 2737 #endif 2738 PetscCall(PetscFree(b->garray)); 2739 PetscCall(VecDestroy(&b->lvec)); 2740 PetscCall(VecScatterDestroy(&b->Mvctx)); 2741 2742 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2743 2744 MatSeqXAIJGetOptions_Private(b->B); 2745 PetscCall(MatDestroy(&b->B)); 2746 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2747 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2748 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2749 MatSeqXAIJRestoreOptions_Private(b->B); 2750 2751 MatSeqXAIJGetOptions_Private(b->A); 2752 PetscCall(MatDestroy(&b->A)); 2753 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2754 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2755 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2756 MatSeqXAIJRestoreOptions_Private(b->A); 2757 2758 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2759 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2760 B->preallocated = PETSC_TRUE; 2761 B->was_assembled = PETSC_FALSE; 2762 B->assembled = PETSC_FALSE; 2763 PetscFunctionReturn(PETSC_SUCCESS); 2764 } 2765 2766 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2767 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2768 2769 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2770 { 2771 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2772 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2773 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2774 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2775 2776 PetscFunctionBegin; 2777 PetscCall(PetscMalloc1(M + 1, &ii)); 2778 ii[0] = 0; 2779 for (i = 0; i < M; i++) { 2780 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2781 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2782 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2783 /* remove one from count of matrix has diagonal */ 2784 for (j = id[i]; j < id[i + 1]; j++) { 2785 if (jd[j] == i) { 2786 ii[i + 1]--; 2787 break; 2788 } 2789 } 2790 } 2791 PetscCall(PetscMalloc1(ii[M], &jj)); 2792 cnt = 0; 2793 for (i = 0; i < M; i++) { 2794 for (j = io[i]; j < io[i + 1]; j++) { 2795 if (garray[jo[j]] > rstart) break; 2796 jj[cnt++] = garray[jo[j]]; 2797 } 2798 for (k = id[i]; k < id[i + 1]; k++) { 2799 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2800 } 2801 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2802 } 2803 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2804 PetscFunctionReturn(PETSC_SUCCESS); 2805 } 2806 2807 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2808 2809 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2810 2811 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2812 { 2813 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2814 Mat_MPIAIJ *b; 2815 Mat B; 2816 2817 PetscFunctionBegin; 2818 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2819 2820 if (reuse == MAT_REUSE_MATRIX) { 2821 B = *newmat; 2822 } else { 2823 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2824 PetscCall(MatSetType(B, MATMPIAIJ)); 2825 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2826 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2827 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2828 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2829 } 2830 b = (Mat_MPIAIJ *)B->data; 2831 2832 if (reuse == MAT_REUSE_MATRIX) { 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2835 } else { 2836 PetscInt *garray = a->garray; 2837 Mat_SeqAIJ *bB; 2838 PetscInt bs, nnz; 2839 PetscCall(MatDestroy(&b->A)); 2840 PetscCall(MatDestroy(&b->B)); 2841 /* just clear out the data structure */ 2842 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2843 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2844 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2845 2846 /* Global numbering for b->B columns */ 2847 bB = (Mat_SeqAIJ *)b->B->data; 2848 bs = A->rmap->bs; 2849 nnz = bB->i[A->rmap->n]; 2850 for (PetscInt k = 0; k < nnz; k++) { 2851 PetscInt bj = bB->j[k] / bs; 2852 PetscInt br = bB->j[k] % bs; 2853 bB->j[k] = garray[bj] * bs + br; 2854 } 2855 } 2856 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2857 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2858 2859 if (reuse == MAT_INPLACE_MATRIX) { 2860 PetscCall(MatHeaderReplace(A, &B)); 2861 } else { 2862 *newmat = B; 2863 } 2864 PetscFunctionReturn(PETSC_SUCCESS); 2865 } 2866 2867 /*MC 2868 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2869 2870 Options Database Keys: 2871 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2872 . -mat_block_size <bs> - set the blocksize used to store the matrix 2873 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2874 - -mat_use_hash_table <fact> - set hash table factor 2875 2876 Level: beginner 2877 2878 Note: 2879 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2880 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2881 2882 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2883 M*/ 2884 2885 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2886 2887 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2888 { 2889 Mat_MPIBAIJ *b; 2890 PetscBool flg = PETSC_FALSE; 2891 2892 PetscFunctionBegin; 2893 PetscCall(PetscNew(&b)); 2894 B->data = (void *)b; 2895 B->ops[0] = MatOps_Values; 2896 B->assembled = PETSC_FALSE; 2897 2898 B->insertmode = NOT_SET_VALUES; 2899 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2900 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2901 2902 /* build local table of row and column ownerships */ 2903 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2904 2905 /* build cache for off array entries formed */ 2906 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2907 2908 b->donotstash = PETSC_FALSE; 2909 b->colmap = NULL; 2910 b->garray = NULL; 2911 b->roworiented = PETSC_TRUE; 2912 2913 /* stuff used in block assembly */ 2914 b->barray = NULL; 2915 2916 /* stuff used for matrix vector multiply */ 2917 b->lvec = NULL; 2918 b->Mvctx = NULL; 2919 2920 /* stuff for MatGetRow() */ 2921 b->rowindices = NULL; 2922 b->rowvalues = NULL; 2923 b->getrowactive = PETSC_FALSE; 2924 2925 /* hash table stuff */ 2926 b->ht = NULL; 2927 b->hd = NULL; 2928 b->ht_size = 0; 2929 b->ht_flag = PETSC_FALSE; 2930 b->ht_fact = 0; 2931 b->ht_total_ct = 0; 2932 b->ht_insert_ct = 0; 2933 2934 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2935 b->ijonly = PETSC_FALSE; 2936 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2940 #if defined(PETSC_HAVE_HYPRE) 2941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2942 #endif 2943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2946 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2947 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2949 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2950 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2951 2952 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2953 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2954 if (flg) { 2955 PetscReal fact = 1.39; 2956 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2957 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2958 if (fact <= 1.0) fact = 1.39; 2959 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2960 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2961 } 2962 PetscOptionsEnd(); 2963 PetscFunctionReturn(PETSC_SUCCESS); 2964 } 2965 2966 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2967 /*MC 2968 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2969 2970 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2971 and `MATMPIBAIJ` otherwise. 2972 2973 Options Database Keys: 2974 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2975 2976 Level: beginner 2977 2978 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2979 M*/ 2980 2981 /*@ 2982 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2983 (block compressed row). 2984 2985 Collective 2986 2987 Input Parameters: 2988 + B - the matrix 2989 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2990 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2991 . d_nz - number of block nonzeros per block row in diagonal portion of local 2992 submatrix (same for all local rows) 2993 . d_nnz - array containing the number of block nonzeros in the various block rows 2994 of the in diagonal portion of the local (possibly different for each block 2995 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2996 set it even if it is zero. 2997 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2998 submatrix (same for all local rows). 2999 - o_nnz - array containing the number of nonzeros in the various block rows of the 3000 off-diagonal portion of the local submatrix (possibly different for 3001 each block row) or `NULL`. 3002 3003 If the *_nnz parameter is given then the *_nz parameter is ignored 3004 3005 Options Database Keys: 3006 + -mat_block_size - size of the blocks to use 3007 - -mat_use_hash_table <fact> - set hash table factor 3008 3009 Level: intermediate 3010 3011 Notes: 3012 For good matrix assembly performance 3013 the user should preallocate the matrix storage by setting the parameters 3014 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3015 performance can be increased by more than a factor of 50. 3016 3017 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3018 than it must be used on all processors that share the object for that argument. 3019 3020 Storage Information: 3021 For a square global matrix we define each processor's diagonal portion 3022 to be its local rows and the corresponding columns (a square submatrix); 3023 each processor's off-diagonal portion encompasses the remainder of the 3024 local matrix (a rectangular submatrix). 3025 3026 The user can specify preallocated storage for the diagonal part of 3027 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3028 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3029 memory allocation. Likewise, specify preallocated storage for the 3030 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3031 3032 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3033 the figure below we depict these three local rows and all columns (0-11). 3034 3035 .vb 3036 0 1 2 3 4 5 6 7 8 9 10 11 3037 -------------------------- 3038 row 3 |o o o d d d o o o o o o 3039 row 4 |o o o d d d o o o o o o 3040 row 5 |o o o d d d o o o o o o 3041 -------------------------- 3042 .ve 3043 3044 Thus, any entries in the d locations are stored in the d (diagonal) 3045 submatrix, and any entries in the o locations are stored in the 3046 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3047 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3048 3049 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3050 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3051 In general, for PDE problems in which most nonzeros are near the diagonal, 3052 one expects `d_nz` >> `o_nz`. 3053 3054 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3055 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3056 You can also run with the option `-info` and look for messages with the string 3057 malloc in them to see if additional memory allocation was needed. 3058 3059 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3060 @*/ 3061 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3062 { 3063 PetscFunctionBegin; 3064 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3065 PetscValidType(B, 1); 3066 PetscValidLogicalCollectiveInt(B, bs, 2); 3067 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3068 PetscFunctionReturn(PETSC_SUCCESS); 3069 } 3070 3071 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3072 /*@ 3073 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3074 (block compressed row). 3075 3076 Collective 3077 3078 Input Parameters: 3079 + comm - MPI communicator 3080 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3081 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3082 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3083 This value should be the same as the local size used in creating the 3084 y vector for the matrix-vector product y = Ax. 3085 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3086 This value should be the same as the local size used in creating the 3087 x vector for the matrix-vector product y = Ax. 3088 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3089 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3090 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3091 submatrix (same for all local rows) 3092 . d_nnz - array containing the number of nonzero blocks in the various block rows 3093 of the in diagonal portion of the local (possibly different for each block 3094 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3095 and set it even if it is zero. 3096 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3097 submatrix (same for all local rows). 3098 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3099 off-diagonal portion of the local submatrix (possibly different for 3100 each block row) or NULL. 3101 3102 Output Parameter: 3103 . A - the matrix 3104 3105 Options Database Keys: 3106 + -mat_block_size - size of the blocks to use 3107 - -mat_use_hash_table <fact> - set hash table factor 3108 3109 Level: intermediate 3110 3111 Notes: 3112 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3113 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3114 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3115 3116 For good matrix assembly performance 3117 the user should preallocate the matrix storage by setting the parameters 3118 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3119 performance can be increased by more than a factor of 50. 3120 3121 If the *_nnz parameter is given then the *_nz parameter is ignored 3122 3123 A nonzero block is any block that as 1 or more nonzeros in it 3124 3125 The user MUST specify either the local or global matrix dimensions 3126 (possibly both). 3127 3128 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3129 than it must be used on all processors that share the object for that argument. 3130 3131 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3132 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3133 3134 Storage Information: 3135 For a square global matrix we define each processor's diagonal portion 3136 to be its local rows and the corresponding columns (a square submatrix); 3137 each processor's off-diagonal portion encompasses the remainder of the 3138 local matrix (a rectangular submatrix). 3139 3140 The user can specify preallocated storage for the diagonal part of 3141 the local submatrix with either d_nz or d_nnz (not both). Set 3142 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3143 memory allocation. Likewise, specify preallocated storage for the 3144 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3145 3146 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3147 the figure below we depict these three local rows and all columns (0-11). 3148 3149 .vb 3150 0 1 2 3 4 5 6 7 8 9 10 11 3151 -------------------------- 3152 row 3 |o o o d d d o o o o o o 3153 row 4 |o o o d d d o o o o o o 3154 row 5 |o o o d d d o o o o o o 3155 -------------------------- 3156 .ve 3157 3158 Thus, any entries in the d locations are stored in the d (diagonal) 3159 submatrix, and any entries in the o locations are stored in the 3160 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3161 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3162 3163 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3164 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3165 In general, for PDE problems in which most nonzeros are near the diagonal, 3166 one expects `d_nz` >> `o_nz`. 3167 3168 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3169 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3170 @*/ 3171 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3172 { 3173 PetscMPIInt size; 3174 3175 PetscFunctionBegin; 3176 PetscCall(MatCreate(comm, A)); 3177 PetscCall(MatSetSizes(*A, m, n, M, N)); 3178 PetscCallMPI(MPI_Comm_size(comm, &size)); 3179 if (size > 1) { 3180 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3181 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3182 } else { 3183 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3184 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3185 } 3186 PetscFunctionReturn(PETSC_SUCCESS); 3187 } 3188 3189 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3190 { 3191 Mat mat; 3192 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3193 PetscInt len = 0; 3194 3195 PetscFunctionBegin; 3196 *newmat = NULL; 3197 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3198 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3199 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3200 3201 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3202 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3203 if (matin->hash_active) { 3204 PetscCall(MatSetUp(mat)); 3205 } else { 3206 mat->factortype = matin->factortype; 3207 mat->preallocated = PETSC_TRUE; 3208 mat->assembled = PETSC_TRUE; 3209 mat->insertmode = NOT_SET_VALUES; 3210 3211 a = (Mat_MPIBAIJ *)mat->data; 3212 mat->rmap->bs = matin->rmap->bs; 3213 a->bs2 = oldmat->bs2; 3214 a->mbs = oldmat->mbs; 3215 a->nbs = oldmat->nbs; 3216 a->Mbs = oldmat->Mbs; 3217 a->Nbs = oldmat->Nbs; 3218 3219 a->size = oldmat->size; 3220 a->rank = oldmat->rank; 3221 a->donotstash = oldmat->donotstash; 3222 a->roworiented = oldmat->roworiented; 3223 a->rowindices = NULL; 3224 a->rowvalues = NULL; 3225 a->getrowactive = PETSC_FALSE; 3226 a->barray = NULL; 3227 a->rstartbs = oldmat->rstartbs; 3228 a->rendbs = oldmat->rendbs; 3229 a->cstartbs = oldmat->cstartbs; 3230 a->cendbs = oldmat->cendbs; 3231 3232 /* hash table stuff */ 3233 a->ht = NULL; 3234 a->hd = NULL; 3235 a->ht_size = 0; 3236 a->ht_flag = oldmat->ht_flag; 3237 a->ht_fact = oldmat->ht_fact; 3238 a->ht_total_ct = 0; 3239 a->ht_insert_ct = 0; 3240 3241 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3242 if (oldmat->colmap) { 3243 #if defined(PETSC_USE_CTABLE) 3244 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3245 #else 3246 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3247 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3248 #endif 3249 } else a->colmap = NULL; 3250 3251 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3252 PetscCall(PetscMalloc1(len, &a->garray)); 3253 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3254 } else a->garray = NULL; 3255 3256 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3257 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3258 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3259 3260 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3261 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3262 } 3263 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3264 *newmat = mat; 3265 PetscFunctionReturn(PETSC_SUCCESS); 3266 } 3267 3268 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3269 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3270 { 3271 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3272 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3273 PetscScalar *matvals; 3274 3275 PetscFunctionBegin; 3276 PetscCall(PetscViewerSetUp(viewer)); 3277 3278 /* read in matrix header */ 3279 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3280 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3281 M = header[1]; 3282 N = header[2]; 3283 nz = header[3]; 3284 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3285 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3286 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3287 3288 /* set block sizes from the viewer's .info file */ 3289 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3290 /* set local sizes if not set already */ 3291 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3292 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3293 /* set global sizes if not set already */ 3294 if (mat->rmap->N < 0) mat->rmap->N = M; 3295 if (mat->cmap->N < 0) mat->cmap->N = N; 3296 PetscCall(PetscLayoutSetUp(mat->rmap)); 3297 PetscCall(PetscLayoutSetUp(mat->cmap)); 3298 3299 /* check if the matrix sizes are correct */ 3300 PetscCall(MatGetSize(mat, &rows, &cols)); 3301 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3302 PetscCall(MatGetBlockSize(mat, &bs)); 3303 PetscCall(MatGetLocalSize(mat, &m, &n)); 3304 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3305 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3306 mbs = m / bs; 3307 nbs = n / bs; 3308 3309 /* read in row lengths and build row indices */ 3310 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3311 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3312 rowidxs[0] = 0; 3313 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3314 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3315 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3316 3317 /* read in column indices and matrix values */ 3318 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3319 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3320 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3321 3322 { /* preallocate matrix storage */ 3323 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3324 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3325 PetscBool sbaij, done; 3326 PetscInt *d_nnz, *o_nnz; 3327 3328 PetscCall(PetscBTCreate(nbs, &bt)); 3329 PetscCall(PetscHSetICreate(&ht)); 3330 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3331 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3332 for (i = 0; i < mbs; i++) { 3333 PetscCall(PetscBTMemzero(nbs, bt)); 3334 PetscCall(PetscHSetIClear(ht)); 3335 for (k = 0; k < bs; k++) { 3336 PetscInt row = bs * i + k; 3337 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3338 PetscInt col = colidxs[j]; 3339 if (!sbaij || col >= row) { 3340 if (col >= cs && col < ce) { 3341 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3342 } else { 3343 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3344 if (done) o_nnz[i]++; 3345 } 3346 } 3347 } 3348 } 3349 } 3350 PetscCall(PetscBTDestroy(&bt)); 3351 PetscCall(PetscHSetIDestroy(&ht)); 3352 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3353 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3354 PetscCall(PetscFree2(d_nnz, o_nnz)); 3355 } 3356 3357 /* store matrix values */ 3358 for (i = 0; i < m; i++) { 3359 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3360 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3361 } 3362 3363 PetscCall(PetscFree(rowidxs)); 3364 PetscCall(PetscFree2(colidxs, matvals)); 3365 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3366 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3367 PetscFunctionReturn(PETSC_SUCCESS); 3368 } 3369 3370 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3371 { 3372 PetscBool isbinary; 3373 3374 PetscFunctionBegin; 3375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3376 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3377 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3378 PetscFunctionReturn(PETSC_SUCCESS); 3379 } 3380 3381 /*@ 3382 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3383 3384 Input Parameters: 3385 + mat - the matrix 3386 - fact - factor 3387 3388 Options Database Key: 3389 . -mat_use_hash_table <fact> - provide the factor 3390 3391 Level: advanced 3392 3393 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3394 @*/ 3395 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3396 { 3397 PetscFunctionBegin; 3398 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3399 PetscFunctionReturn(PETSC_SUCCESS); 3400 } 3401 3402 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3403 { 3404 Mat_MPIBAIJ *baij; 3405 3406 PetscFunctionBegin; 3407 baij = (Mat_MPIBAIJ *)mat->data; 3408 baij->ht_fact = fact; 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } 3411 3412 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3413 { 3414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3415 PetscBool flg; 3416 3417 PetscFunctionBegin; 3418 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3419 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3420 if (Ad) *Ad = a->A; 3421 if (Ao) *Ao = a->B; 3422 if (colmap) *colmap = a->garray; 3423 PetscFunctionReturn(PETSC_SUCCESS); 3424 } 3425 3426 /* 3427 Special version for direct calls from Fortran (to eliminate two function call overheads 3428 */ 3429 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3430 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3431 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3432 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3433 #endif 3434 3435 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3436 /*@C 3437 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3438 3439 Collective 3440 3441 Input Parameters: 3442 + matin - the matrix 3443 . min - number of input rows 3444 . im - input rows 3445 . nin - number of input columns 3446 . in - input columns 3447 . v - numerical values input 3448 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3449 3450 Level: advanced 3451 3452 Developer Notes: 3453 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3454 3455 .seealso: `Mat`, `MatSetValuesBlocked()` 3456 @*/ 3457 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3458 { 3459 /* convert input arguments to C version */ 3460 Mat mat = *matin; 3461 PetscInt m = *min, n = *nin; 3462 InsertMode addv = *addvin; 3463 3464 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3465 const MatScalar *value; 3466 MatScalar *barray = baij->barray; 3467 PetscBool roworiented = baij->roworiented; 3468 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3469 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3470 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3471 3472 PetscFunctionBegin; 3473 /* tasks normally handled by MatSetValuesBlocked() */ 3474 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3475 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3476 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3477 if (mat->assembled) { 3478 mat->was_assembled = PETSC_TRUE; 3479 mat->assembled = PETSC_FALSE; 3480 } 3481 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3482 3483 if (!barray) { 3484 PetscCall(PetscMalloc1(bs2, &barray)); 3485 baij->barray = barray; 3486 } 3487 3488 if (roworiented) stepval = (n - 1) * bs; 3489 else stepval = (m - 1) * bs; 3490 3491 for (i = 0; i < m; i++) { 3492 if (im[i] < 0) continue; 3493 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3494 if (im[i] >= rstart && im[i] < rend) { 3495 row = im[i] - rstart; 3496 for (j = 0; j < n; j++) { 3497 /* If NumCol = 1 then a copy is not required */ 3498 if ((roworiented) && (n == 1)) { 3499 barray = (MatScalar *)v + i * bs2; 3500 } else if ((!roworiented) && (m == 1)) { 3501 barray = (MatScalar *)v + j * bs2; 3502 } else { /* Here a copy is required */ 3503 if (roworiented) { 3504 value = v + i * (stepval + bs) * bs + j * bs; 3505 } else { 3506 value = v + j * (stepval + bs) * bs + i * bs; 3507 } 3508 for (ii = 0; ii < bs; ii++, value += stepval) { 3509 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3510 } 3511 barray -= bs2; 3512 } 3513 3514 if (in[j] >= cstart && in[j] < cend) { 3515 col = in[j] - cstart; 3516 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3517 } else if (in[j] < 0) { 3518 continue; 3519 } else { 3520 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3521 if (mat->was_assembled) { 3522 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3523 3524 #if defined(PETSC_USE_DEBUG) 3525 #if defined(PETSC_USE_CTABLE) 3526 { 3527 PetscInt data; 3528 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3529 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3530 } 3531 #else 3532 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3533 #endif 3534 #endif 3535 #if defined(PETSC_USE_CTABLE) 3536 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3537 col = (col - 1) / bs; 3538 #else 3539 col = (baij->colmap[in[j]] - 1) / bs; 3540 #endif 3541 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3542 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3543 col = in[j]; 3544 } 3545 } else col = in[j]; 3546 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3547 } 3548 } 3549 } else { 3550 if (!baij->donotstash) { 3551 if (roworiented) { 3552 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3553 } else { 3554 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3555 } 3556 } 3557 } 3558 } 3559 3560 /* task normally handled by MatSetValuesBlocked() */ 3561 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3562 PetscFunctionReturn(PETSC_SUCCESS); 3563 } 3564 3565 /*@ 3566 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3567 3568 Collective 3569 3570 Input Parameters: 3571 + comm - MPI communicator 3572 . bs - the block size, only a block size of 1 is supported 3573 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3574 . n - This value should be the same as the local size used in creating the 3575 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3576 calculated if `N` is given) For square matrices `n` is almost always `m`. 3577 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3578 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3579 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3580 . j - column indices 3581 - a - matrix values 3582 3583 Output Parameter: 3584 . mat - the matrix 3585 3586 Level: intermediate 3587 3588 Notes: 3589 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3590 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3591 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3592 3593 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3594 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3595 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3596 with column-major ordering within blocks. 3597 3598 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3599 3600 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3601 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3602 @*/ 3603 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3604 { 3605 PetscFunctionBegin; 3606 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3607 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3608 PetscCall(MatCreate(comm, mat)); 3609 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3610 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3611 PetscCall(MatSetBlockSize(*mat, bs)); 3612 PetscCall(MatSetUp(*mat)); 3613 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3614 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3615 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3616 PetscFunctionReturn(PETSC_SUCCESS); 3617 } 3618 3619 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3620 { 3621 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3622 PetscInt *indx; 3623 PetscScalar *values; 3624 3625 PetscFunctionBegin; 3626 PetscCall(MatGetSize(inmat, &m, &N)); 3627 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3628 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3629 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3630 PetscInt *bindx, rmax = a->rmax, j; 3631 PetscMPIInt rank, size; 3632 3633 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3634 mbs = m / bs; 3635 Nbs = N / cbs; 3636 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3637 nbs = n / cbs; 3638 3639 PetscCall(PetscMalloc1(rmax, &bindx)); 3640 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3641 3642 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3643 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3644 if (rank == size - 1) { 3645 /* Check sum(nbs) = Nbs */ 3646 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3647 } 3648 3649 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3650 for (i = 0; i < mbs; i++) { 3651 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3652 nnz = nnz / bs; 3653 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3654 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3655 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3656 } 3657 PetscCall(PetscFree(bindx)); 3658 3659 PetscCall(MatCreate(comm, outmat)); 3660 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3661 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3662 PetscCall(MatSetType(*outmat, MATBAIJ)); 3663 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3664 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3665 MatPreallocateEnd(dnz, onz); 3666 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3667 } 3668 3669 /* numeric phase */ 3670 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3671 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3672 3673 for (i = 0; i < m; i++) { 3674 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3675 Ii = i + rstart; 3676 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3677 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3678 } 3679 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3680 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3681 PetscFunctionReturn(PETSC_SUCCESS); 3682 } 3683