1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)(B)->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp, *tmp2; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 721 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 722 v = amat->a; 723 jj = amat->j; 724 for (i = 0; i < amat->nz; i++) { 725 for (j = 0; j < bs; j++) { 726 col = bs * (cstart + *jj) + j; /* column index */ 727 for (row = 0; row < bs; row++) { 728 tmp[col] += PetscAbsScalar(*v); 729 v++; 730 } 731 } 732 jj++; 733 } 734 v = bmat->a; 735 jj = bmat->j; 736 for (i = 0; i < bmat->nz; i++) { 737 for (j = 0; j < bs; j++) { 738 col = bs * garray[*jj] + j; 739 for (row = 0; row < bs; row++) { 740 tmp[col] += PetscAbsScalar(*v); 741 v++; 742 } 743 } 744 jj++; 745 } 746 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 747 *nrm = 0.0; 748 for (j = 0; j < mat->cmap->N; j++) { 749 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 750 } 751 PetscCall(PetscFree(tmp)); 752 PetscCall(PetscFree(tmp2)); 753 } else if (type == NORM_INFINITY) { /* max row sum */ 754 PetscReal *sums; 755 PetscCall(PetscMalloc1(bs, &sums)); 756 sum = 0.0; 757 for (j = 0; j < amat->mbs; j++) { 758 for (row = 0; row < bs; row++) sums[row] = 0.0; 759 v = amat->a + bs2 * amat->i[j]; 760 nz = amat->i[j + 1] - amat->i[j]; 761 for (i = 0; i < nz; i++) { 762 for (col = 0; col < bs; col++) { 763 for (row = 0; row < bs; row++) { 764 sums[row] += PetscAbsScalar(*v); 765 v++; 766 } 767 } 768 } 769 v = bmat->a + bs2 * bmat->i[j]; 770 nz = bmat->i[j + 1] - bmat->i[j]; 771 for (i = 0; i < nz; i++) { 772 for (col = 0; col < bs; col++) { 773 for (row = 0; row < bs; row++) { 774 sums[row] += PetscAbsScalar(*v); 775 v++; 776 } 777 } 778 } 779 for (row = 0; row < bs; row++) { 780 if (sums[row] > sum) sum = sums[row]; 781 } 782 } 783 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 784 PetscCall(PetscFree(sums)); 785 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 786 } 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 /* 791 Creates the hash table, and sets the table 792 This table is created only once. 793 If new entried need to be added to the matrix 794 then the hash table has to be destroyed and 795 recreated. 796 */ 797 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 800 Mat A = baij->A, B = baij->B; 801 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 802 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 803 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 804 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 805 PetscInt *HT, key; 806 MatScalar **HD; 807 PetscReal tmp; 808 #if defined(PETSC_USE_INFO) 809 PetscInt ct = 0, max = 0; 810 #endif 811 812 PetscFunctionBegin; 813 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 814 815 baij->ht_size = (PetscInt)(factor * nz); 816 ht_size = baij->ht_size; 817 818 /* Allocate Memory for Hash Table */ 819 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 820 HD = baij->hd; 821 HT = baij->ht; 822 823 /* Loop Over A */ 824 for (i = 0; i < a->mbs; i++) { 825 for (j = ai[i]; j < ai[i + 1]; j++) { 826 row = i + rstart; 827 col = aj[j] + cstart; 828 829 key = row * Nbs + col + 1; 830 h1 = HASH(ht_size, key, tmp); 831 for (k = 0; k < ht_size; k++) { 832 if (!HT[(h1 + k) % ht_size]) { 833 HT[(h1 + k) % ht_size] = key; 834 HD[(h1 + k) % ht_size] = a->a + j * bs2; 835 break; 836 #if defined(PETSC_USE_INFO) 837 } else { 838 ct++; 839 #endif 840 } 841 } 842 #if defined(PETSC_USE_INFO) 843 if (k > max) max = k; 844 #endif 845 } 846 } 847 /* Loop Over B */ 848 for (i = 0; i < b->mbs; i++) { 849 for (j = bi[i]; j < bi[i + 1]; j++) { 850 row = i + rstart; 851 col = garray[bj[j]]; 852 key = row * Nbs + col + 1; 853 h1 = HASH(ht_size, key, tmp); 854 for (k = 0; k < ht_size; k++) { 855 if (!HT[(h1 + k) % ht_size]) { 856 HT[(h1 + k) % ht_size] = key; 857 HD[(h1 + k) % ht_size] = b->a + j * bs2; 858 break; 859 #if defined(PETSC_USE_INFO) 860 } else { 861 ct++; 862 #endif 863 } 864 } 865 #if defined(PETSC_USE_INFO) 866 if (k > max) max = k; 867 #endif 868 } 869 } 870 871 /* Print Summary */ 872 #if defined(PETSC_USE_INFO) 873 for (i = 0, j = 0; i < ht_size; i++) { 874 if (HT[i]) j++; 875 } 876 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 877 #endif 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 882 { 883 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 884 PetscInt nstash, reallocs; 885 886 PetscFunctionBegin; 887 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 888 889 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 890 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 891 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 892 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 893 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 899 { 900 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 901 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 902 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 903 PetscInt *row, *col; 904 PetscBool r1, r2, r3, other_disassembled; 905 MatScalar *val; 906 PetscMPIInt n; 907 908 PetscFunctionBegin; 909 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 910 if (!baij->donotstash && !mat->nooffprocentries) { 911 while (1) { 912 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 913 if (!flg) break; 914 915 for (i = 0; i < n;) { 916 /* Now identify the consecutive vals belonging to the same row */ 917 for (j = i, rstart = row[j]; j < n; j++) { 918 if (row[j] != rstart) break; 919 } 920 if (j < n) ncols = j - i; 921 else ncols = n - i; 922 /* Now assemble all these values with a single function call */ 923 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 924 i = j; 925 } 926 } 927 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 928 /* Now process the block-stash. Since the values are stashed column-oriented, 929 set the row-oriented flag to column-oriented, and after MatSetValues() 930 restore the original flags */ 931 r1 = baij->roworiented; 932 r2 = a->roworiented; 933 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 934 935 baij->roworiented = PETSC_FALSE; 936 a->roworiented = PETSC_FALSE; 937 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 938 while (1) { 939 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 940 if (!flg) break; 941 942 for (i = 0; i < n;) { 943 /* Now identify the consecutive vals belonging to the same row */ 944 for (j = i, rstart = row[j]; j < n; j++) { 945 if (row[j] != rstart) break; 946 } 947 if (j < n) ncols = j - i; 948 else ncols = n - i; 949 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 950 i = j; 951 } 952 } 953 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 954 955 baij->roworiented = r1; 956 a->roworiented = r2; 957 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 958 } 959 960 PetscCall(MatAssemblyBegin(baij->A, mode)); 961 PetscCall(MatAssemblyEnd(baij->A, mode)); 962 963 /* determine if any processor has disassembled, if so we must 964 also disassemble ourselves, in order that we may reassemble. */ 965 /* 966 if nonzero structure of submatrix B cannot change then we know that 967 no processor disassembled thus we can skip this stuff 968 */ 969 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 970 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 971 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 972 } 973 974 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 975 PetscCall(MatAssemblyBegin(baij->B, mode)); 976 PetscCall(MatAssemblyEnd(baij->B, mode)); 977 978 #if defined(PETSC_USE_INFO) 979 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 980 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 981 982 baij->ht_total_ct = 0; 983 baij->ht_insert_ct = 0; 984 } 985 #endif 986 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 987 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 988 989 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 990 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 991 } 992 993 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 994 995 baij->rowvalues = NULL; 996 997 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 998 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 999 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1000 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1001 } 1002 PetscFunctionReturn(PETSC_SUCCESS); 1003 } 1004 1005 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1006 #include <petscdraw.h> 1007 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1008 { 1009 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1010 PetscMPIInt rank = baij->rank; 1011 PetscInt bs = mat->rmap->bs; 1012 PetscBool iascii, isdraw; 1013 PetscViewer sviewer; 1014 PetscViewerFormat format; 1015 1016 PetscFunctionBegin; 1017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1019 if (iascii) { 1020 PetscCall(PetscViewerGetFormat(viewer, &format)); 1021 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1022 MatInfo info; 1023 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1024 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1025 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1026 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1027 mat->rmap->bs, (double)info.memory)); 1028 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1029 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1030 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1031 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1032 PetscCall(PetscViewerFlush(viewer)); 1033 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1034 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1035 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1038 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } 1043 } 1044 1045 if (isdraw) { 1046 PetscDraw draw; 1047 PetscBool isnull; 1048 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1049 PetscCall(PetscDrawIsNull(draw, &isnull)); 1050 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1051 } 1052 1053 { 1054 /* assemble the entire matrix onto first processor. */ 1055 Mat A; 1056 Mat_SeqBAIJ *Aloc; 1057 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1058 MatScalar *a; 1059 const char *matname; 1060 1061 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1062 /* Perhaps this should be the type of mat? */ 1063 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1064 if (rank == 0) { 1065 PetscCall(MatSetSizes(A, M, N, M, N)); 1066 } else { 1067 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1068 } 1069 PetscCall(MatSetType(A, MATMPIBAIJ)); 1070 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1071 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1072 1073 /* copy over the A part */ 1074 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1075 ai = Aloc->i; 1076 aj = Aloc->j; 1077 a = Aloc->a; 1078 PetscCall(PetscMalloc1(bs, &rvals)); 1079 1080 for (i = 0; i < mbs; i++) { 1081 rvals[0] = bs * (baij->rstartbs + i); 1082 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1083 for (j = ai[i]; j < ai[i + 1]; j++) { 1084 col = (baij->cstartbs + aj[j]) * bs; 1085 for (k = 0; k < bs; k++) { 1086 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1087 col++; 1088 a += bs; 1089 } 1090 } 1091 } 1092 /* copy over the B part */ 1093 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1094 ai = Aloc->i; 1095 aj = Aloc->j; 1096 a = Aloc->a; 1097 for (i = 0; i < mbs; i++) { 1098 rvals[0] = bs * (baij->rstartbs + i); 1099 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1100 for (j = ai[i]; j < ai[i + 1]; j++) { 1101 col = baij->garray[aj[j]] * bs; 1102 for (k = 0; k < bs; k++) { 1103 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1104 col++; 1105 a += bs; 1106 } 1107 } 1108 } 1109 PetscCall(PetscFree(rvals)); 1110 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1111 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1112 /* 1113 Everyone has to call to draw the matrix since the graphics waits are 1114 synchronized across all processors that share the PetscDraw object 1115 */ 1116 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1117 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1118 if (rank == 0) { 1119 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1120 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1121 } 1122 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1123 PetscCall(MatDestroy(&A)); 1124 } 1125 PetscFunctionReturn(PETSC_SUCCESS); 1126 } 1127 1128 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1129 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1130 { 1131 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1132 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1133 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1134 const PetscInt *garray = aij->garray; 1135 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1136 PetscInt64 nz, hnz; 1137 PetscInt *rowlens, *colidxs; 1138 PetscScalar *matvals; 1139 PetscMPIInt rank; 1140 1141 PetscFunctionBegin; 1142 PetscCall(PetscViewerSetUp(viewer)); 1143 1144 M = mat->rmap->N; 1145 N = mat->cmap->N; 1146 m = mat->rmap->n; 1147 rs = mat->rmap->rstart; 1148 cs = mat->cmap->rstart; 1149 bs = mat->rmap->bs; 1150 nz = bs * bs * (A->nz + B->nz); 1151 1152 /* write matrix header */ 1153 header[0] = MAT_FILE_CLASSID; 1154 header[1] = M; 1155 header[2] = N; 1156 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1157 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1158 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1159 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1160 1161 /* fill in and store row lengths */ 1162 PetscCall(PetscMalloc1(m, &rowlens)); 1163 for (cnt = 0, i = 0; i < A->mbs; i++) 1164 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1165 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1166 PetscCall(PetscFree(rowlens)); 1167 1168 /* fill in and store column indices */ 1169 PetscCall(PetscMalloc1(nz, &colidxs)); 1170 for (cnt = 0, i = 0; i < A->mbs; i++) { 1171 for (k = 0; k < bs; k++) { 1172 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1173 if (garray[B->j[jb]] > cs / bs) break; 1174 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1175 } 1176 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1177 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1178 for (; jb < B->i[i + 1]; jb++) 1179 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1180 } 1181 } 1182 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1183 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1184 PetscCall(PetscFree(colidxs)); 1185 1186 /* fill in and store nonzero values */ 1187 PetscCall(PetscMalloc1(nz, &matvals)); 1188 for (cnt = 0, i = 0; i < A->mbs; i++) { 1189 for (k = 0; k < bs; k++) { 1190 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1191 if (garray[B->j[jb]] > cs / bs) break; 1192 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1193 } 1194 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1195 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1196 for (; jb < B->i[i + 1]; jb++) 1197 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1198 } 1199 } 1200 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1201 PetscCall(PetscFree(matvals)); 1202 1203 /* write block size option to the viewer's .info file */ 1204 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1205 PetscFunctionReturn(PETSC_SUCCESS); 1206 } 1207 1208 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1209 { 1210 PetscBool iascii, isdraw, issocket, isbinary; 1211 1212 PetscFunctionBegin; 1213 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1214 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1217 if (iascii || isdraw || issocket) { 1218 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1219 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1224 { 1225 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1226 PetscInt nt; 1227 1228 PetscFunctionBegin; 1229 PetscCall(VecGetLocalSize(xx, &nt)); 1230 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1231 PetscCall(VecGetLocalSize(yy, &nt)); 1232 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1233 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1234 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1235 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1236 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1237 PetscFunctionReturn(PETSC_SUCCESS); 1238 } 1239 1240 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1241 { 1242 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1243 1244 PetscFunctionBegin; 1245 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1246 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1247 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1248 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1249 PetscFunctionReturn(PETSC_SUCCESS); 1250 } 1251 1252 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1255 1256 PetscFunctionBegin; 1257 /* do nondiagonal part */ 1258 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1259 /* do local part */ 1260 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1261 /* add partial results together */ 1262 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1264 PetscFunctionReturn(PETSC_SUCCESS); 1265 } 1266 1267 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1268 { 1269 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1270 1271 PetscFunctionBegin; 1272 /* do nondiagonal part */ 1273 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1274 /* do local part */ 1275 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1276 /* add partial results together */ 1277 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 /* 1283 This only works correctly for square matrices where the subblock A->A is the 1284 diagonal block 1285 */ 1286 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1287 { 1288 PetscFunctionBegin; 1289 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1290 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1291 PetscFunctionReturn(PETSC_SUCCESS); 1292 } 1293 1294 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1295 { 1296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1297 1298 PetscFunctionBegin; 1299 PetscCall(MatScale(a->A, aa)); 1300 PetscCall(MatScale(a->B, aa)); 1301 PetscFunctionReturn(PETSC_SUCCESS); 1302 } 1303 1304 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1305 { 1306 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1307 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1308 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1309 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1310 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1311 1312 PetscFunctionBegin; 1313 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1314 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1315 mat->getrowactive = PETSC_TRUE; 1316 1317 if (!mat->rowvalues && (idx || v)) { 1318 /* 1319 allocate enough space to hold information from the longest row. 1320 */ 1321 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1322 PetscInt max = 1, mbs = mat->mbs, tmp; 1323 for (i = 0; i < mbs; i++) { 1324 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1325 if (max < tmp) max = tmp; 1326 } 1327 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1328 } 1329 lrow = row - brstart; 1330 1331 pvA = &vworkA; 1332 pcA = &cworkA; 1333 pvB = &vworkB; 1334 pcB = &cworkB; 1335 if (!v) { 1336 pvA = NULL; 1337 pvB = NULL; 1338 } 1339 if (!idx) { 1340 pcA = NULL; 1341 if (!v) pcB = NULL; 1342 } 1343 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1344 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1345 nztot = nzA + nzB; 1346 1347 cmap = mat->garray; 1348 if (v || idx) { 1349 if (nztot) { 1350 /* Sort by increasing column numbers, assuming A and B already sorted */ 1351 PetscInt imark = -1; 1352 if (v) { 1353 *v = v_p = mat->rowvalues; 1354 for (i = 0; i < nzB; i++) { 1355 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1356 else break; 1357 } 1358 imark = i; 1359 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1360 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1361 } 1362 if (idx) { 1363 *idx = idx_p = mat->rowindices; 1364 if (imark > -1) { 1365 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1366 } else { 1367 for (i = 0; i < nzB; i++) { 1368 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1369 else break; 1370 } 1371 imark = i; 1372 } 1373 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1374 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1375 } 1376 } else { 1377 if (idx) *idx = NULL; 1378 if (v) *v = NULL; 1379 } 1380 } 1381 *nz = nztot; 1382 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1383 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1388 { 1389 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1390 1391 PetscFunctionBegin; 1392 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1393 baij->getrowactive = PETSC_FALSE; 1394 PetscFunctionReturn(PETSC_SUCCESS); 1395 } 1396 1397 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1398 { 1399 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1400 1401 PetscFunctionBegin; 1402 PetscCall(MatZeroEntries(l->A)); 1403 PetscCall(MatZeroEntries(l->B)); 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1408 { 1409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1410 Mat A = a->A, B = a->B; 1411 PetscLogDouble isend[5], irecv[5]; 1412 1413 PetscFunctionBegin; 1414 info->block_size = (PetscReal)matin->rmap->bs; 1415 1416 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1417 1418 isend[0] = info->nz_used; 1419 isend[1] = info->nz_allocated; 1420 isend[2] = info->nz_unneeded; 1421 isend[3] = info->memory; 1422 isend[4] = info->mallocs; 1423 1424 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1425 1426 isend[0] += info->nz_used; 1427 isend[1] += info->nz_allocated; 1428 isend[2] += info->nz_unneeded; 1429 isend[3] += info->memory; 1430 isend[4] += info->mallocs; 1431 1432 if (flag == MAT_LOCAL) { 1433 info->nz_used = isend[0]; 1434 info->nz_allocated = isend[1]; 1435 info->nz_unneeded = isend[2]; 1436 info->memory = isend[3]; 1437 info->mallocs = isend[4]; 1438 } else if (flag == MAT_GLOBAL_MAX) { 1439 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1440 1441 info->nz_used = irecv[0]; 1442 info->nz_allocated = irecv[1]; 1443 info->nz_unneeded = irecv[2]; 1444 info->memory = irecv[3]; 1445 info->mallocs = irecv[4]; 1446 } else if (flag == MAT_GLOBAL_SUM) { 1447 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1448 1449 info->nz_used = irecv[0]; 1450 info->nz_allocated = irecv[1]; 1451 info->nz_unneeded = irecv[2]; 1452 info->memory = irecv[3]; 1453 info->mallocs = irecv[4]; 1454 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1455 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1456 info->fill_ratio_needed = 0; 1457 info->factor_mallocs = 0; 1458 PetscFunctionReturn(PETSC_SUCCESS); 1459 } 1460 1461 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1462 { 1463 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1464 1465 PetscFunctionBegin; 1466 switch (op) { 1467 case MAT_NEW_NONZERO_LOCATIONS: 1468 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1469 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1470 case MAT_KEEP_NONZERO_PATTERN: 1471 case MAT_NEW_NONZERO_LOCATION_ERR: 1472 MatCheckPreallocated(A, 1); 1473 PetscCall(MatSetOption(a->A, op, flg)); 1474 PetscCall(MatSetOption(a->B, op, flg)); 1475 break; 1476 case MAT_ROW_ORIENTED: 1477 MatCheckPreallocated(A, 1); 1478 a->roworiented = flg; 1479 1480 PetscCall(MatSetOption(a->A, op, flg)); 1481 PetscCall(MatSetOption(a->B, op, flg)); 1482 break; 1483 case MAT_FORCE_DIAGONAL_ENTRIES: 1484 case MAT_SORTED_FULL: 1485 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1486 break; 1487 case MAT_IGNORE_OFF_PROC_ENTRIES: 1488 a->donotstash = flg; 1489 break; 1490 case MAT_USE_HASH_TABLE: 1491 a->ht_flag = flg; 1492 a->ht_fact = 1.39; 1493 break; 1494 case MAT_SYMMETRIC: 1495 case MAT_STRUCTURALLY_SYMMETRIC: 1496 case MAT_HERMITIAN: 1497 case MAT_SUBMAT_SINGLEIS: 1498 case MAT_SYMMETRY_ETERNAL: 1499 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1500 case MAT_SPD_ETERNAL: 1501 /* if the diagonal matrix is square it inherits some of the properties above */ 1502 break; 1503 default: 1504 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1505 } 1506 PetscFunctionReturn(PETSC_SUCCESS); 1507 } 1508 1509 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1512 Mat_SeqBAIJ *Aloc; 1513 Mat B; 1514 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1515 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1516 MatScalar *a; 1517 1518 PetscFunctionBegin; 1519 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1520 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1521 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1522 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1523 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1524 /* Do not know preallocation information, but must set block size */ 1525 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1526 } else { 1527 B = *matout; 1528 } 1529 1530 /* copy over the A part */ 1531 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1532 ai = Aloc->i; 1533 aj = Aloc->j; 1534 a = Aloc->a; 1535 PetscCall(PetscMalloc1(bs, &rvals)); 1536 1537 for (i = 0; i < mbs; i++) { 1538 rvals[0] = bs * (baij->rstartbs + i); 1539 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1540 for (j = ai[i]; j < ai[i + 1]; j++) { 1541 col = (baij->cstartbs + aj[j]) * bs; 1542 for (k = 0; k < bs; k++) { 1543 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1544 1545 col++; 1546 a += bs; 1547 } 1548 } 1549 } 1550 /* copy over the B part */ 1551 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1552 ai = Aloc->i; 1553 aj = Aloc->j; 1554 a = Aloc->a; 1555 for (i = 0; i < mbs; i++) { 1556 rvals[0] = bs * (baij->rstartbs + i); 1557 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1558 for (j = ai[i]; j < ai[i + 1]; j++) { 1559 col = baij->garray[aj[j]] * bs; 1560 for (k = 0; k < bs; k++) { 1561 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1562 col++; 1563 a += bs; 1564 } 1565 } 1566 } 1567 PetscCall(PetscFree(rvals)); 1568 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1569 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1570 1571 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1572 else PetscCall(MatHeaderMerge(A, &B)); 1573 PetscFunctionReturn(PETSC_SUCCESS); 1574 } 1575 1576 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1577 { 1578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1579 Mat a = baij->A, b = baij->B; 1580 PetscInt s1, s2, s3; 1581 1582 PetscFunctionBegin; 1583 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1584 if (rr) { 1585 PetscCall(VecGetLocalSize(rr, &s1)); 1586 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1587 /* Overlap communication with computation. */ 1588 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1589 } 1590 if (ll) { 1591 PetscCall(VecGetLocalSize(ll, &s1)); 1592 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1593 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1594 } 1595 /* scale the diagonal block */ 1596 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1597 1598 if (rr) { 1599 /* Do a scatter end and then right scale the off-diagonal block */ 1600 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1601 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1602 } 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 } 1605 1606 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1607 { 1608 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1609 PetscInt *lrows; 1610 PetscInt r, len; 1611 PetscBool cong; 1612 1613 PetscFunctionBegin; 1614 /* get locally owned rows */ 1615 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1616 /* fix right-hand side if needed */ 1617 if (x && b) { 1618 const PetscScalar *xx; 1619 PetscScalar *bb; 1620 1621 PetscCall(VecGetArrayRead(x, &xx)); 1622 PetscCall(VecGetArray(b, &bb)); 1623 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1624 PetscCall(VecRestoreArrayRead(x, &xx)); 1625 PetscCall(VecRestoreArray(b, &bb)); 1626 } 1627 1628 /* actually zap the local rows */ 1629 /* 1630 Zero the required rows. If the "diagonal block" of the matrix 1631 is square and the user wishes to set the diagonal we use separate 1632 code so that MatSetValues() is not called for each diagonal allocating 1633 new memory, thus calling lots of mallocs and slowing things down. 1634 1635 */ 1636 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1637 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1638 PetscCall(MatHasCongruentLayouts(A, &cong)); 1639 if ((diag != 0.0) && cong) { 1640 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1641 } else if (diag != 0.0) { 1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1643 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1644 for (r = 0; r < len; ++r) { 1645 const PetscInt row = lrows[r] + A->rmap->rstart; 1646 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1647 } 1648 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1650 } else { 1651 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1652 } 1653 PetscCall(PetscFree(lrows)); 1654 1655 /* only change matrix nonzero state if pattern was allowed to be changed */ 1656 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1657 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1658 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1659 } 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1664 { 1665 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1666 PetscMPIInt n = A->rmap->n, p = 0; 1667 PetscInt i, j, k, r, len = 0, row, col, count; 1668 PetscInt *lrows, *owners = A->rmap->range; 1669 PetscSFNode *rrows; 1670 PetscSF sf; 1671 const PetscScalar *xx; 1672 PetscScalar *bb, *mask; 1673 Vec xmask, lmask; 1674 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1675 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1676 PetscScalar *aa; 1677 1678 PetscFunctionBegin; 1679 /* Create SF where leaves are input rows and roots are owned rows */ 1680 PetscCall(PetscMalloc1(n, &lrows)); 1681 for (r = 0; r < n; ++r) lrows[r] = -1; 1682 PetscCall(PetscMalloc1(N, &rrows)); 1683 for (r = 0; r < N; ++r) { 1684 const PetscInt idx = rows[r]; 1685 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1686 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1687 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1688 } 1689 rrows[r].rank = p; 1690 rrows[r].index = rows[r] - owners[p]; 1691 } 1692 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1693 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1694 /* Collect flags for rows to be zeroed */ 1695 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1696 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1697 PetscCall(PetscSFDestroy(&sf)); 1698 /* Compress and put in row numbers */ 1699 for (r = 0; r < n; ++r) 1700 if (lrows[r] >= 0) lrows[len++] = r; 1701 /* zero diagonal part of matrix */ 1702 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1703 /* handle off-diagonal part of matrix */ 1704 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1705 PetscCall(VecDuplicate(l->lvec, &lmask)); 1706 PetscCall(VecGetArray(xmask, &bb)); 1707 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1708 PetscCall(VecRestoreArray(xmask, &bb)); 1709 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1710 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecDestroy(&xmask)); 1712 if (x) { 1713 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1716 PetscCall(VecGetArray(b, &bb)); 1717 } 1718 PetscCall(VecGetArray(lmask, &mask)); 1719 /* remove zeroed rows of off-diagonal matrix */ 1720 for (i = 0; i < len; ++i) { 1721 row = lrows[i]; 1722 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1723 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1724 for (k = 0; k < count; ++k) { 1725 aa[0] = 0.0; 1726 aa += bs; 1727 } 1728 } 1729 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1730 for (i = 0; i < l->B->rmap->N; ++i) { 1731 row = i / bs; 1732 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1733 for (k = 0; k < bs; ++k) { 1734 col = bs * baij->j[j] + k; 1735 if (PetscAbsScalar(mask[col])) { 1736 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1737 if (x) bb[i] -= aa[0] * xx[col]; 1738 aa[0] = 0.0; 1739 } 1740 } 1741 } 1742 } 1743 if (x) { 1744 PetscCall(VecRestoreArray(b, &bb)); 1745 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1746 } 1747 PetscCall(VecRestoreArray(lmask, &mask)); 1748 PetscCall(VecDestroy(&lmask)); 1749 PetscCall(PetscFree(lrows)); 1750 1751 /* only change matrix nonzero state if pattern was allowed to be changed */ 1752 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1753 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1754 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1755 } 1756 PetscFunctionReturn(PETSC_SUCCESS); 1757 } 1758 1759 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1760 { 1761 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1762 1763 PetscFunctionBegin; 1764 PetscCall(MatSetUnfactored(a->A)); 1765 PetscFunctionReturn(PETSC_SUCCESS); 1766 } 1767 1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1769 1770 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1771 { 1772 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1773 Mat a, b, c, d; 1774 PetscBool flg; 1775 1776 PetscFunctionBegin; 1777 a = matA->A; 1778 b = matA->B; 1779 c = matB->A; 1780 d = matB->B; 1781 1782 PetscCall(MatEqual(a, c, &flg)); 1783 if (flg) PetscCall(MatEqual(b, d, &flg)); 1784 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1785 PetscFunctionReturn(PETSC_SUCCESS); 1786 } 1787 1788 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1789 { 1790 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1791 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1792 1793 PetscFunctionBegin; 1794 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1795 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1796 PetscCall(MatCopy_Basic(A, B, str)); 1797 } else { 1798 PetscCall(MatCopy(a->A, b->A, str)); 1799 PetscCall(MatCopy(a->B, b->B, str)); 1800 } 1801 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1806 { 1807 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1808 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1809 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1810 1811 PetscFunctionBegin; 1812 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1817 { 1818 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1819 PetscBLASInt bnz, one = 1; 1820 Mat_SeqBAIJ *x, *y; 1821 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1822 1823 PetscFunctionBegin; 1824 if (str == SAME_NONZERO_PATTERN) { 1825 PetscScalar alpha = a; 1826 x = (Mat_SeqBAIJ *)xx->A->data; 1827 y = (Mat_SeqBAIJ *)yy->A->data; 1828 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1829 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1830 x = (Mat_SeqBAIJ *)xx->B->data; 1831 y = (Mat_SeqBAIJ *)yy->B->data; 1832 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1833 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1834 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1835 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1836 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1837 } else { 1838 Mat B; 1839 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1840 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1841 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1842 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1843 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1844 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1845 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1846 PetscCall(MatSetType(B, MATMPIBAIJ)); 1847 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1848 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1849 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1850 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1851 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1852 PetscCall(MatHeaderMerge(Y, &B)); 1853 PetscCall(PetscFree(nnz_d)); 1854 PetscCall(PetscFree(nnz_o)); 1855 } 1856 PetscFunctionReturn(PETSC_SUCCESS); 1857 } 1858 1859 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1860 { 1861 PetscFunctionBegin; 1862 if (PetscDefined(USE_COMPLEX)) { 1863 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1864 1865 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1866 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1867 } 1868 PetscFunctionReturn(PETSC_SUCCESS); 1869 } 1870 1871 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1872 { 1873 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1874 1875 PetscFunctionBegin; 1876 PetscCall(MatRealPart(a->A)); 1877 PetscCall(MatRealPart(a->B)); 1878 PetscFunctionReturn(PETSC_SUCCESS); 1879 } 1880 1881 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1882 { 1883 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1884 1885 PetscFunctionBegin; 1886 PetscCall(MatImaginaryPart(a->A)); 1887 PetscCall(MatImaginaryPart(a->B)); 1888 PetscFunctionReturn(PETSC_SUCCESS); 1889 } 1890 1891 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1892 { 1893 IS iscol_local; 1894 PetscInt csize; 1895 1896 PetscFunctionBegin; 1897 PetscCall(ISGetLocalSize(iscol, &csize)); 1898 if (call == MAT_REUSE_MATRIX) { 1899 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1900 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1901 } else { 1902 PetscCall(ISAllGather(iscol, &iscol_local)); 1903 } 1904 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1905 if (call == MAT_INITIAL_MATRIX) { 1906 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1907 PetscCall(ISDestroy(&iscol_local)); 1908 } 1909 PetscFunctionReturn(PETSC_SUCCESS); 1910 } 1911 1912 /* 1913 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1914 in local and then by concatenating the local matrices the end result. 1915 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1916 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1917 */ 1918 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1919 { 1920 PetscMPIInt rank, size; 1921 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1922 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1923 Mat M, Mreuse; 1924 MatScalar *vwork, *aa; 1925 MPI_Comm comm; 1926 IS isrow_new, iscol_new; 1927 Mat_SeqBAIJ *aij; 1928 1929 PetscFunctionBegin; 1930 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1931 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1932 PetscCallMPI(MPI_Comm_size(comm, &size)); 1933 /* The compression and expansion should be avoided. Doesn't point 1934 out errors, might change the indices, hence buggey */ 1935 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1936 if (isrow == iscol) { 1937 iscol_new = isrow_new; 1938 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1939 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1940 1941 if (call == MAT_REUSE_MATRIX) { 1942 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1943 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1944 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1945 } else { 1946 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1947 } 1948 PetscCall(ISDestroy(&isrow_new)); 1949 PetscCall(ISDestroy(&iscol_new)); 1950 /* 1951 m - number of local rows 1952 n - number of columns (same on all processors) 1953 rstart - first row in new global matrix generated 1954 */ 1955 PetscCall(MatGetBlockSize(mat, &bs)); 1956 PetscCall(MatGetSize(Mreuse, &m, &n)); 1957 m = m / bs; 1958 n = n / bs; 1959 1960 if (call == MAT_INITIAL_MATRIX) { 1961 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1962 ii = aij->i; 1963 jj = aij->j; 1964 1965 /* 1966 Determine the number of non-zeros in the diagonal and off-diagonal 1967 portions of the matrix in order to do correct preallocation 1968 */ 1969 1970 /* first get start and end of "diagonal" columns */ 1971 if (csize == PETSC_DECIDE) { 1972 PetscCall(ISGetSize(isrow, &mglobal)); 1973 if (mglobal == n * bs) { /* square matrix */ 1974 nlocal = m; 1975 } else { 1976 nlocal = n / size + ((n % size) > rank); 1977 } 1978 } else { 1979 nlocal = csize / bs; 1980 } 1981 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1982 rstart = rend - nlocal; 1983 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1984 1985 /* next, compute all the lengths */ 1986 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1987 for (i = 0; i < m; i++) { 1988 jend = ii[i + 1] - ii[i]; 1989 olen = 0; 1990 dlen = 0; 1991 for (j = 0; j < jend; j++) { 1992 if (*jj < rstart || *jj >= rend) olen++; 1993 else dlen++; 1994 jj++; 1995 } 1996 olens[i] = olen; 1997 dlens[i] = dlen; 1998 } 1999 PetscCall(MatCreate(comm, &M)); 2000 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2001 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2002 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2003 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2004 PetscCall(PetscFree2(dlens, olens)); 2005 } else { 2006 PetscInt ml, nl; 2007 2008 M = *newmat; 2009 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2010 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2011 PetscCall(MatZeroEntries(M)); 2012 /* 2013 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2014 rather than the slower MatSetValues(). 2015 */ 2016 M->was_assembled = PETSC_TRUE; 2017 M->assembled = PETSC_FALSE; 2018 } 2019 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2020 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2021 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2022 ii = aij->i; 2023 jj = aij->j; 2024 aa = aij->a; 2025 for (i = 0; i < m; i++) { 2026 row = rstart / bs + i; 2027 nz = ii[i + 1] - ii[i]; 2028 cwork = jj; 2029 jj = PetscSafePointerPlusOffset(jj, nz); 2030 vwork = aa; 2031 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2032 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2033 } 2034 2035 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2036 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2037 *newmat = M; 2038 2039 /* save submatrix used in processor for next request */ 2040 if (call == MAT_INITIAL_MATRIX) { 2041 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2042 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2043 } 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2048 { 2049 MPI_Comm comm, pcomm; 2050 PetscInt clocal_size, nrows; 2051 const PetscInt *rows; 2052 PetscMPIInt size; 2053 IS crowp, lcolp; 2054 2055 PetscFunctionBegin; 2056 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2057 /* make a collective version of 'rowp' */ 2058 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2059 if (pcomm == comm) { 2060 crowp = rowp; 2061 } else { 2062 PetscCall(ISGetSize(rowp, &nrows)); 2063 PetscCall(ISGetIndices(rowp, &rows)); 2064 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2065 PetscCall(ISRestoreIndices(rowp, &rows)); 2066 } 2067 PetscCall(ISSetPermutation(crowp)); 2068 /* make a local version of 'colp' */ 2069 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2070 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2071 if (size == 1) { 2072 lcolp = colp; 2073 } else { 2074 PetscCall(ISAllGather(colp, &lcolp)); 2075 } 2076 PetscCall(ISSetPermutation(lcolp)); 2077 /* now we just get the submatrix */ 2078 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2079 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2080 /* clean up */ 2081 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2082 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2087 { 2088 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2089 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2090 2091 PetscFunctionBegin; 2092 if (nghosts) *nghosts = B->nbs; 2093 if (ghosts) *ghosts = baij->garray; 2094 PetscFunctionReturn(PETSC_SUCCESS); 2095 } 2096 2097 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2098 { 2099 Mat B; 2100 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2101 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2102 Mat_SeqAIJ *b; 2103 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2104 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2105 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2106 2107 PetscFunctionBegin; 2108 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2109 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2110 2111 /* Tell every processor the number of nonzeros per row */ 2112 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2113 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2114 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2115 displs = recvcounts + size; 2116 for (i = 0; i < size; i++) { 2117 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2118 displs[i] = A->rmap->range[i] / bs; 2119 } 2120 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2121 /* Create the sequential matrix of the same type as the local block diagonal */ 2122 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2123 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2124 PetscCall(MatSetType(B, MATSEQAIJ)); 2125 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2126 b = (Mat_SeqAIJ *)B->data; 2127 2128 /* Copy my part of matrix column indices over */ 2129 sendcount = ad->nz + bd->nz; 2130 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2131 a_jsendbuf = ad->j; 2132 b_jsendbuf = bd->j; 2133 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2134 cnt = 0; 2135 for (i = 0; i < n; i++) { 2136 /* put in lower diagonal portion */ 2137 m = bd->i[i + 1] - bd->i[i]; 2138 while (m > 0) { 2139 /* is it above diagonal (in bd (compressed) numbering) */ 2140 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2141 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2142 m--; 2143 } 2144 2145 /* put in diagonal portion */ 2146 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2147 2148 /* put in upper diagonal portion */ 2149 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2150 } 2151 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2152 2153 /* Gather all column indices to all processors */ 2154 for (i = 0; i < size; i++) { 2155 recvcounts[i] = 0; 2156 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2157 } 2158 displs[0] = 0; 2159 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2160 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2161 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2162 /* set the b->ilen (length of each row) values */ 2163 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2164 /* set the b->i indices */ 2165 b->i[0] = 0; 2166 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2167 PetscCall(PetscFree(lens)); 2168 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2169 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2170 PetscCall(PetscFree(recvcounts)); 2171 2172 PetscCall(MatPropagateSymmetryOptions(A, B)); 2173 *newmat = B; 2174 PetscFunctionReturn(PETSC_SUCCESS); 2175 } 2176 2177 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2178 { 2179 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2180 Vec bb1 = NULL; 2181 2182 PetscFunctionBegin; 2183 if (flag == SOR_APPLY_UPPER) { 2184 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2189 2190 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2191 if (flag & SOR_ZERO_INITIAL_GUESS) { 2192 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2193 its--; 2194 } 2195 2196 while (its--) { 2197 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2198 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2199 2200 /* update rhs: bb1 = bb - B*x */ 2201 PetscCall(VecScale(mat->lvec, -1.0)); 2202 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2203 2204 /* local sweep */ 2205 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2206 } 2207 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2208 if (flag & SOR_ZERO_INITIAL_GUESS) { 2209 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2210 its--; 2211 } 2212 while (its--) { 2213 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2214 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2215 2216 /* update rhs: bb1 = bb - B*x */ 2217 PetscCall(VecScale(mat->lvec, -1.0)); 2218 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2219 2220 /* local sweep */ 2221 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2222 } 2223 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2224 if (flag & SOR_ZERO_INITIAL_GUESS) { 2225 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2226 its--; 2227 } 2228 while (its--) { 2229 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2230 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2231 2232 /* update rhs: bb1 = bb - B*x */ 2233 PetscCall(VecScale(mat->lvec, -1.0)); 2234 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2235 2236 /* local sweep */ 2237 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2238 } 2239 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2240 2241 PetscCall(VecDestroy(&bb1)); 2242 PetscFunctionReturn(PETSC_SUCCESS); 2243 } 2244 2245 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2246 { 2247 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2248 PetscInt m, N, i, *garray = aij->garray; 2249 PetscInt ib, jb, bs = A->rmap->bs; 2250 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2251 MatScalar *a_val = a_aij->a; 2252 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2253 MatScalar *b_val = b_aij->a; 2254 PetscReal *work; 2255 2256 PetscFunctionBegin; 2257 PetscCall(MatGetSize(A, &m, &N)); 2258 PetscCall(PetscCalloc1(N, &work)); 2259 if (type == NORM_2) { 2260 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2261 for (jb = 0; jb < bs; jb++) { 2262 for (ib = 0; ib < bs; ib++) { 2263 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2264 a_val++; 2265 } 2266 } 2267 } 2268 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2269 for (jb = 0; jb < bs; jb++) { 2270 for (ib = 0; ib < bs; ib++) { 2271 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2272 b_val++; 2273 } 2274 } 2275 } 2276 } else if (type == NORM_1) { 2277 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2278 for (jb = 0; jb < bs; jb++) { 2279 for (ib = 0; ib < bs; ib++) { 2280 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2281 a_val++; 2282 } 2283 } 2284 } 2285 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2286 for (jb = 0; jb < bs; jb++) { 2287 for (ib = 0; ib < bs; ib++) { 2288 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2289 b_val++; 2290 } 2291 } 2292 } 2293 } else if (type == NORM_INFINITY) { 2294 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2295 for (jb = 0; jb < bs; jb++) { 2296 for (ib = 0; ib < bs; ib++) { 2297 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2298 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2299 a_val++; 2300 } 2301 } 2302 } 2303 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2304 for (jb = 0; jb < bs; jb++) { 2305 for (ib = 0; ib < bs; ib++) { 2306 int col = garray[b_aij->j[i]] * bs + jb; 2307 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2308 b_val++; 2309 } 2310 } 2311 } 2312 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2313 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2314 for (jb = 0; jb < bs; jb++) { 2315 for (ib = 0; ib < bs; ib++) { 2316 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2317 a_val++; 2318 } 2319 } 2320 } 2321 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2322 for (jb = 0; jb < bs; jb++) { 2323 for (ib = 0; ib < bs; ib++) { 2324 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2325 b_val++; 2326 } 2327 } 2328 } 2329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2330 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2331 for (jb = 0; jb < bs; jb++) { 2332 for (ib = 0; ib < bs; ib++) { 2333 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2334 a_val++; 2335 } 2336 } 2337 } 2338 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2339 for (jb = 0; jb < bs; jb++) { 2340 for (ib = 0; ib < bs; ib++) { 2341 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2342 b_val++; 2343 } 2344 } 2345 } 2346 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2347 if (type == NORM_INFINITY) { 2348 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2349 } else { 2350 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2351 } 2352 PetscCall(PetscFree(work)); 2353 if (type == NORM_2) { 2354 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2355 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2356 for (i = 0; i < N; i++) reductions[i] /= m; 2357 } 2358 PetscFunctionReturn(PETSC_SUCCESS); 2359 } 2360 2361 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2362 { 2363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2364 2365 PetscFunctionBegin; 2366 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2367 A->factorerrortype = a->A->factorerrortype; 2368 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2369 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2370 PetscFunctionReturn(PETSC_SUCCESS); 2371 } 2372 2373 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2374 { 2375 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2376 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2377 2378 PetscFunctionBegin; 2379 if (!Y->preallocated) { 2380 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2381 } else if (!aij->nz) { 2382 PetscInt nonew = aij->nonew; 2383 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2384 aij->nonew = nonew; 2385 } 2386 PetscCall(MatShift_Basic(Y, a)); 2387 PetscFunctionReturn(PETSC_SUCCESS); 2388 } 2389 2390 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2391 { 2392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2393 2394 PetscFunctionBegin; 2395 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2396 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2397 if (d) { 2398 PetscInt rstart; 2399 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2400 *d += rstart / A->rmap->bs; 2401 } 2402 PetscFunctionReturn(PETSC_SUCCESS); 2403 } 2404 2405 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2406 { 2407 PetscFunctionBegin; 2408 *a = ((Mat_MPIBAIJ *)A->data)->A; 2409 PetscFunctionReturn(PETSC_SUCCESS); 2410 } 2411 2412 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2413 { 2414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2415 2416 PetscFunctionBegin; 2417 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2418 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2419 PetscFunctionReturn(PETSC_SUCCESS); 2420 } 2421 2422 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2423 MatGetRow_MPIBAIJ, 2424 MatRestoreRow_MPIBAIJ, 2425 MatMult_MPIBAIJ, 2426 /* 4*/ MatMultAdd_MPIBAIJ, 2427 MatMultTranspose_MPIBAIJ, 2428 MatMultTransposeAdd_MPIBAIJ, 2429 NULL, 2430 NULL, 2431 NULL, 2432 /*10*/ NULL, 2433 NULL, 2434 NULL, 2435 MatSOR_MPIBAIJ, 2436 MatTranspose_MPIBAIJ, 2437 /*15*/ MatGetInfo_MPIBAIJ, 2438 MatEqual_MPIBAIJ, 2439 MatGetDiagonal_MPIBAIJ, 2440 MatDiagonalScale_MPIBAIJ, 2441 MatNorm_MPIBAIJ, 2442 /*20*/ MatAssemblyBegin_MPIBAIJ, 2443 MatAssemblyEnd_MPIBAIJ, 2444 MatSetOption_MPIBAIJ, 2445 MatZeroEntries_MPIBAIJ, 2446 /*24*/ MatZeroRows_MPIBAIJ, 2447 NULL, 2448 NULL, 2449 NULL, 2450 NULL, 2451 /*29*/ MatSetUp_MPI_Hash, 2452 NULL, 2453 NULL, 2454 MatGetDiagonalBlock_MPIBAIJ, 2455 NULL, 2456 /*34*/ MatDuplicate_MPIBAIJ, 2457 NULL, 2458 NULL, 2459 NULL, 2460 NULL, 2461 /*39*/ MatAXPY_MPIBAIJ, 2462 MatCreateSubMatrices_MPIBAIJ, 2463 MatIncreaseOverlap_MPIBAIJ, 2464 MatGetValues_MPIBAIJ, 2465 MatCopy_MPIBAIJ, 2466 /*44*/ NULL, 2467 MatScale_MPIBAIJ, 2468 MatShift_MPIBAIJ, 2469 NULL, 2470 MatZeroRowsColumns_MPIBAIJ, 2471 /*49*/ NULL, 2472 NULL, 2473 NULL, 2474 NULL, 2475 NULL, 2476 /*54*/ MatFDColoringCreate_MPIXAIJ, 2477 NULL, 2478 MatSetUnfactored_MPIBAIJ, 2479 MatPermute_MPIBAIJ, 2480 MatSetValuesBlocked_MPIBAIJ, 2481 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2482 MatDestroy_MPIBAIJ, 2483 MatView_MPIBAIJ, 2484 NULL, 2485 NULL, 2486 /*64*/ NULL, 2487 NULL, 2488 NULL, 2489 NULL, 2490 NULL, 2491 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2492 NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*74*/ NULL, 2497 MatFDColoringApply_BAIJ, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*79*/ NULL, 2502 NULL, 2503 NULL, 2504 NULL, 2505 MatLoad_MPIBAIJ, 2506 /*84*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 /*89*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*94*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*99*/ NULL, 2522 NULL, 2523 NULL, 2524 MatConjugate_MPIBAIJ, 2525 NULL, 2526 /*104*/ NULL, 2527 MatRealPart_MPIBAIJ, 2528 MatImaginaryPart_MPIBAIJ, 2529 NULL, 2530 NULL, 2531 /*109*/ NULL, 2532 NULL, 2533 NULL, 2534 NULL, 2535 MatMissingDiagonal_MPIBAIJ, 2536 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2537 NULL, 2538 MatGetGhosts_MPIBAIJ, 2539 NULL, 2540 NULL, 2541 /*119*/ NULL, 2542 NULL, 2543 NULL, 2544 NULL, 2545 MatGetMultiProcBlock_MPIBAIJ, 2546 /*124*/ NULL, 2547 MatGetColumnReductions_MPIBAIJ, 2548 MatInvertBlockDiagonal_MPIBAIJ, 2549 NULL, 2550 NULL, 2551 /*129*/ NULL, 2552 NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 /*134*/ NULL, 2557 NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 /*139*/ MatSetBlockSizes_Default, 2562 NULL, 2563 NULL, 2564 MatFDColoringSetUp_MPIXAIJ, 2565 NULL, 2566 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2567 NULL, 2568 NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 /*150*/ NULL, 2573 MatEliminateZeros_MPIBAIJ, 2574 MatGetRowSumAbs_MPIBAIJ}; 2575 2576 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2577 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2578 2579 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2580 { 2581 PetscInt m, rstart, cstart, cend; 2582 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2583 const PetscInt *JJ = NULL; 2584 PetscScalar *values = NULL; 2585 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2586 PetscBool nooffprocentries; 2587 2588 PetscFunctionBegin; 2589 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2590 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2591 PetscCall(PetscLayoutSetUp(B->rmap)); 2592 PetscCall(PetscLayoutSetUp(B->cmap)); 2593 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2594 m = B->rmap->n / bs; 2595 rstart = B->rmap->rstart / bs; 2596 cstart = B->cmap->rstart / bs; 2597 cend = B->cmap->rend / bs; 2598 2599 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2600 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2601 for (i = 0; i < m; i++) { 2602 nz = ii[i + 1] - ii[i]; 2603 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2604 nz_max = PetscMax(nz_max, nz); 2605 dlen = 0; 2606 olen = 0; 2607 JJ = jj + ii[i]; 2608 for (j = 0; j < nz; j++) { 2609 if (*JJ < cstart || *JJ >= cend) olen++; 2610 else dlen++; 2611 JJ++; 2612 } 2613 d_nnz[i] = dlen; 2614 o_nnz[i] = olen; 2615 } 2616 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2617 PetscCall(PetscFree2(d_nnz, o_nnz)); 2618 2619 values = (PetscScalar *)V; 2620 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2621 for (i = 0; i < m; i++) { 2622 PetscInt row = i + rstart; 2623 PetscInt ncols = ii[i + 1] - ii[i]; 2624 const PetscInt *icols = jj + ii[i]; 2625 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2626 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2627 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2628 } else { /* block ordering does not match so we can only insert one block at a time. */ 2629 PetscInt j; 2630 for (j = 0; j < ncols; j++) { 2631 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2632 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2633 } 2634 } 2635 } 2636 2637 if (!V) PetscCall(PetscFree(values)); 2638 nooffprocentries = B->nooffprocentries; 2639 B->nooffprocentries = PETSC_TRUE; 2640 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2641 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2642 B->nooffprocentries = nooffprocentries; 2643 2644 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2645 PetscFunctionReturn(PETSC_SUCCESS); 2646 } 2647 2648 /*@C 2649 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2650 2651 Collective 2652 2653 Input Parameters: 2654 + B - the matrix 2655 . bs - the block size 2656 . i - the indices into `j` for the start of each local row (starts with zero) 2657 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2658 - v - optional values in the matrix, use `NULL` if not provided 2659 2660 Level: advanced 2661 2662 Notes: 2663 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2664 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2665 called this routine. 2666 2667 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2668 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2669 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2670 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2671 block column and the second index is over columns within a block. 2672 2673 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2674 2675 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2676 @*/ 2677 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2678 { 2679 PetscFunctionBegin; 2680 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2681 PetscValidType(B, 1); 2682 PetscValidLogicalCollectiveInt(B, bs, 2); 2683 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2684 PetscFunctionReturn(PETSC_SUCCESS); 2685 } 2686 2687 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2688 { 2689 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2690 PetscInt i; 2691 PetscMPIInt size; 2692 2693 PetscFunctionBegin; 2694 if (B->hash_active) { 2695 B->ops[0] = b->cops; 2696 B->hash_active = PETSC_FALSE; 2697 } 2698 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2699 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2700 PetscCall(PetscLayoutSetUp(B->rmap)); 2701 PetscCall(PetscLayoutSetUp(B->cmap)); 2702 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2703 2704 if (d_nnz) { 2705 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2706 } 2707 if (o_nnz) { 2708 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2709 } 2710 2711 b->bs2 = bs * bs; 2712 b->mbs = B->rmap->n / bs; 2713 b->nbs = B->cmap->n / bs; 2714 b->Mbs = B->rmap->N / bs; 2715 b->Nbs = B->cmap->N / bs; 2716 2717 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2718 b->rstartbs = B->rmap->rstart / bs; 2719 b->rendbs = B->rmap->rend / bs; 2720 b->cstartbs = B->cmap->rstart / bs; 2721 b->cendbs = B->cmap->rend / bs; 2722 2723 #if defined(PETSC_USE_CTABLE) 2724 PetscCall(PetscHMapIDestroy(&b->colmap)); 2725 #else 2726 PetscCall(PetscFree(b->colmap)); 2727 #endif 2728 PetscCall(PetscFree(b->garray)); 2729 PetscCall(VecDestroy(&b->lvec)); 2730 PetscCall(VecScatterDestroy(&b->Mvctx)); 2731 2732 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2733 2734 MatSeqXAIJGetOptions_Private(b->B); 2735 PetscCall(MatDestroy(&b->B)); 2736 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2737 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2738 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2739 MatSeqXAIJRestoreOptions_Private(b->B); 2740 2741 MatSeqXAIJGetOptions_Private(b->A); 2742 PetscCall(MatDestroy(&b->A)); 2743 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2744 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2745 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2746 MatSeqXAIJRestoreOptions_Private(b->A); 2747 2748 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2749 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2750 B->preallocated = PETSC_TRUE; 2751 B->was_assembled = PETSC_FALSE; 2752 B->assembled = PETSC_FALSE; 2753 PetscFunctionReturn(PETSC_SUCCESS); 2754 } 2755 2756 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2757 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2758 2759 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2760 { 2761 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2762 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2763 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2764 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2765 2766 PetscFunctionBegin; 2767 PetscCall(PetscMalloc1(M + 1, &ii)); 2768 ii[0] = 0; 2769 for (i = 0; i < M; i++) { 2770 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2771 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2772 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2773 /* remove one from count of matrix has diagonal */ 2774 for (j = id[i]; j < id[i + 1]; j++) { 2775 if (jd[j] == i) { 2776 ii[i + 1]--; 2777 break; 2778 } 2779 } 2780 } 2781 PetscCall(PetscMalloc1(ii[M], &jj)); 2782 cnt = 0; 2783 for (i = 0; i < M; i++) { 2784 for (j = io[i]; j < io[i + 1]; j++) { 2785 if (garray[jo[j]] > rstart) break; 2786 jj[cnt++] = garray[jo[j]]; 2787 } 2788 for (k = id[i]; k < id[i + 1]; k++) { 2789 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2790 } 2791 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2792 } 2793 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2794 PetscFunctionReturn(PETSC_SUCCESS); 2795 } 2796 2797 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2798 2799 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2800 2801 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2802 { 2803 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2804 Mat_MPIAIJ *b; 2805 Mat B; 2806 2807 PetscFunctionBegin; 2808 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2809 2810 if (reuse == MAT_REUSE_MATRIX) { 2811 B = *newmat; 2812 } else { 2813 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2814 PetscCall(MatSetType(B, MATMPIAIJ)); 2815 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2816 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2817 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2818 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2819 } 2820 b = (Mat_MPIAIJ *)B->data; 2821 2822 if (reuse == MAT_REUSE_MATRIX) { 2823 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2824 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2825 } else { 2826 PetscInt *garray = a->garray; 2827 Mat_SeqAIJ *bB; 2828 PetscInt bs, nnz; 2829 PetscCall(MatDestroy(&b->A)); 2830 PetscCall(MatDestroy(&b->B)); 2831 /* just clear out the data structure */ 2832 PetscCall(MatDisAssemble_MPIAIJ(B)); 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2835 2836 /* Global numbering for b->B columns */ 2837 bB = (Mat_SeqAIJ *)b->B->data; 2838 bs = A->rmap->bs; 2839 nnz = bB->i[A->rmap->n]; 2840 for (PetscInt k = 0; k < nnz; k++) { 2841 PetscInt bj = bB->j[k] / bs; 2842 PetscInt br = bB->j[k] % bs; 2843 bB->j[k] = garray[bj] * bs + br; 2844 } 2845 } 2846 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2847 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2848 2849 if (reuse == MAT_INPLACE_MATRIX) { 2850 PetscCall(MatHeaderReplace(A, &B)); 2851 } else { 2852 *newmat = B; 2853 } 2854 PetscFunctionReturn(PETSC_SUCCESS); 2855 } 2856 2857 /*MC 2858 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2859 2860 Options Database Keys: 2861 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2862 . -mat_block_size <bs> - set the blocksize used to store the matrix 2863 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2864 - -mat_use_hash_table <fact> - set hash table factor 2865 2866 Level: beginner 2867 2868 Note: 2869 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2870 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2871 2872 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2873 M*/ 2874 2875 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2876 2877 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2878 { 2879 Mat_MPIBAIJ *b; 2880 PetscBool flg = PETSC_FALSE; 2881 2882 PetscFunctionBegin; 2883 PetscCall(PetscNew(&b)); 2884 B->data = (void *)b; 2885 B->ops[0] = MatOps_Values; 2886 B->assembled = PETSC_FALSE; 2887 2888 B->insertmode = NOT_SET_VALUES; 2889 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2890 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2891 2892 /* build local table of row and column ownerships */ 2893 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2894 2895 /* build cache for off array entries formed */ 2896 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2897 2898 b->donotstash = PETSC_FALSE; 2899 b->colmap = NULL; 2900 b->garray = NULL; 2901 b->roworiented = PETSC_TRUE; 2902 2903 /* stuff used in block assembly */ 2904 b->barray = NULL; 2905 2906 /* stuff used for matrix vector multiply */ 2907 b->lvec = NULL; 2908 b->Mvctx = NULL; 2909 2910 /* stuff for MatGetRow() */ 2911 b->rowindices = NULL; 2912 b->rowvalues = NULL; 2913 b->getrowactive = PETSC_FALSE; 2914 2915 /* hash table stuff */ 2916 b->ht = NULL; 2917 b->hd = NULL; 2918 b->ht_size = 0; 2919 b->ht_flag = PETSC_FALSE; 2920 b->ht_fact = 0; 2921 b->ht_total_ct = 0; 2922 b->ht_insert_ct = 0; 2923 2924 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2925 b->ijonly = PETSC_FALSE; 2926 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2930 #if defined(PETSC_HAVE_HYPRE) 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2932 #endif 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2940 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2941 2942 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2943 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2944 if (flg) { 2945 PetscReal fact = 1.39; 2946 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2947 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2948 if (fact <= 1.0) fact = 1.39; 2949 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2950 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2951 } 2952 PetscOptionsEnd(); 2953 PetscFunctionReturn(PETSC_SUCCESS); 2954 } 2955 2956 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2957 /*MC 2958 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2959 2960 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2961 and `MATMPIBAIJ` otherwise. 2962 2963 Options Database Keys: 2964 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2965 2966 Level: beginner 2967 2968 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2969 M*/ 2970 2971 /*@C 2972 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2973 (block compressed row). 2974 2975 Collective 2976 2977 Input Parameters: 2978 + B - the matrix 2979 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2980 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2981 . d_nz - number of block nonzeros per block row in diagonal portion of local 2982 submatrix (same for all local rows) 2983 . d_nnz - array containing the number of block nonzeros in the various block rows 2984 of the in diagonal portion of the local (possibly different for each block 2985 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2986 set it even if it is zero. 2987 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2988 submatrix (same for all local rows). 2989 - o_nnz - array containing the number of nonzeros in the various block rows of the 2990 off-diagonal portion of the local submatrix (possibly different for 2991 each block row) or `NULL`. 2992 2993 If the *_nnz parameter is given then the *_nz parameter is ignored 2994 2995 Options Database Keys: 2996 + -mat_block_size - size of the blocks to use 2997 - -mat_use_hash_table <fact> - set hash table factor 2998 2999 Level: intermediate 3000 3001 Notes: 3002 For good matrix assembly performance 3003 the user should preallocate the matrix storage by setting the parameters 3004 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3005 performance can be increased by more than a factor of 50. 3006 3007 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3008 than it must be used on all processors that share the object for that argument. 3009 3010 Storage Information: 3011 For a square global matrix we define each processor's diagonal portion 3012 to be its local rows and the corresponding columns (a square submatrix); 3013 each processor's off-diagonal portion encompasses the remainder of the 3014 local matrix (a rectangular submatrix). 3015 3016 The user can specify preallocated storage for the diagonal part of 3017 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3018 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3019 memory allocation. Likewise, specify preallocated storage for the 3020 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3021 3022 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3023 the figure below we depict these three local rows and all columns (0-11). 3024 3025 .vb 3026 0 1 2 3 4 5 6 7 8 9 10 11 3027 -------------------------- 3028 row 3 |o o o d d d o o o o o o 3029 row 4 |o o o d d d o o o o o o 3030 row 5 |o o o d d d o o o o o o 3031 -------------------------- 3032 .ve 3033 3034 Thus, any entries in the d locations are stored in the d (diagonal) 3035 submatrix, and any entries in the o locations are stored in the 3036 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3037 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3038 3039 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3040 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3041 In general, for PDE problems in which most nonzeros are near the diagonal, 3042 one expects `d_nz` >> `o_nz`. 3043 3044 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3045 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3046 You can also run with the option `-info` and look for messages with the string 3047 malloc in them to see if additional memory allocation was needed. 3048 3049 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3050 @*/ 3051 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3052 { 3053 PetscFunctionBegin; 3054 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3055 PetscValidType(B, 1); 3056 PetscValidLogicalCollectiveInt(B, bs, 2); 3057 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3058 PetscFunctionReturn(PETSC_SUCCESS); 3059 } 3060 3061 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3062 /*@C 3063 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3064 (block compressed row). 3065 3066 Collective 3067 3068 Input Parameters: 3069 + comm - MPI communicator 3070 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3071 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3072 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3073 This value should be the same as the local size used in creating the 3074 y vector for the matrix-vector product y = Ax. 3075 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3076 This value should be the same as the local size used in creating the 3077 x vector for the matrix-vector product y = Ax. 3078 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3079 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3080 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3081 submatrix (same for all local rows) 3082 . d_nnz - array containing the number of nonzero blocks in the various block rows 3083 of the in diagonal portion of the local (possibly different for each block 3084 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3085 and set it even if it is zero. 3086 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3087 submatrix (same for all local rows). 3088 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3089 off-diagonal portion of the local submatrix (possibly different for 3090 each block row) or NULL. 3091 3092 Output Parameter: 3093 . A - the matrix 3094 3095 Options Database Keys: 3096 + -mat_block_size - size of the blocks to use 3097 - -mat_use_hash_table <fact> - set hash table factor 3098 3099 Level: intermediate 3100 3101 Notes: 3102 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3103 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3104 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3105 3106 For good matrix assembly performance 3107 the user should preallocate the matrix storage by setting the parameters 3108 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3109 performance can be increased by more than a factor of 50. 3110 3111 If the *_nnz parameter is given then the *_nz parameter is ignored 3112 3113 A nonzero block is any block that as 1 or more nonzeros in it 3114 3115 The user MUST specify either the local or global matrix dimensions 3116 (possibly both). 3117 3118 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3119 than it must be used on all processors that share the object for that argument. 3120 3121 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3122 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3123 3124 Storage Information: 3125 For a square global matrix we define each processor's diagonal portion 3126 to be its local rows and the corresponding columns (a square submatrix); 3127 each processor's off-diagonal portion encompasses the remainder of the 3128 local matrix (a rectangular submatrix). 3129 3130 The user can specify preallocated storage for the diagonal part of 3131 the local submatrix with either d_nz or d_nnz (not both). Set 3132 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3133 memory allocation. Likewise, specify preallocated storage for the 3134 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3135 3136 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3137 the figure below we depict these three local rows and all columns (0-11). 3138 3139 .vb 3140 0 1 2 3 4 5 6 7 8 9 10 11 3141 -------------------------- 3142 row 3 |o o o d d d o o o o o o 3143 row 4 |o o o d d d o o o o o o 3144 row 5 |o o o d d d o o o o o o 3145 -------------------------- 3146 .ve 3147 3148 Thus, any entries in the d locations are stored in the d (diagonal) 3149 submatrix, and any entries in the o locations are stored in the 3150 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3151 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3152 3153 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3154 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3155 In general, for PDE problems in which most nonzeros are near the diagonal, 3156 one expects `d_nz` >> `o_nz`. 3157 3158 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3159 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3160 @*/ 3161 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3162 { 3163 PetscMPIInt size; 3164 3165 PetscFunctionBegin; 3166 PetscCall(MatCreate(comm, A)); 3167 PetscCall(MatSetSizes(*A, m, n, M, N)); 3168 PetscCallMPI(MPI_Comm_size(comm, &size)); 3169 if (size > 1) { 3170 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3171 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3172 } else { 3173 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3174 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3175 } 3176 PetscFunctionReturn(PETSC_SUCCESS); 3177 } 3178 3179 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3180 { 3181 Mat mat; 3182 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3183 PetscInt len = 0; 3184 3185 PetscFunctionBegin; 3186 *newmat = NULL; 3187 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3188 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3189 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3190 3191 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3192 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3193 if (matin->hash_active) { 3194 PetscCall(MatSetUp(mat)); 3195 } else { 3196 mat->factortype = matin->factortype; 3197 mat->preallocated = PETSC_TRUE; 3198 mat->assembled = PETSC_TRUE; 3199 mat->insertmode = NOT_SET_VALUES; 3200 3201 a = (Mat_MPIBAIJ *)mat->data; 3202 mat->rmap->bs = matin->rmap->bs; 3203 a->bs2 = oldmat->bs2; 3204 a->mbs = oldmat->mbs; 3205 a->nbs = oldmat->nbs; 3206 a->Mbs = oldmat->Mbs; 3207 a->Nbs = oldmat->Nbs; 3208 3209 a->size = oldmat->size; 3210 a->rank = oldmat->rank; 3211 a->donotstash = oldmat->donotstash; 3212 a->roworiented = oldmat->roworiented; 3213 a->rowindices = NULL; 3214 a->rowvalues = NULL; 3215 a->getrowactive = PETSC_FALSE; 3216 a->barray = NULL; 3217 a->rstartbs = oldmat->rstartbs; 3218 a->rendbs = oldmat->rendbs; 3219 a->cstartbs = oldmat->cstartbs; 3220 a->cendbs = oldmat->cendbs; 3221 3222 /* hash table stuff */ 3223 a->ht = NULL; 3224 a->hd = NULL; 3225 a->ht_size = 0; 3226 a->ht_flag = oldmat->ht_flag; 3227 a->ht_fact = oldmat->ht_fact; 3228 a->ht_total_ct = 0; 3229 a->ht_insert_ct = 0; 3230 3231 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3232 if (oldmat->colmap) { 3233 #if defined(PETSC_USE_CTABLE) 3234 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3235 #else 3236 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3237 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3238 #endif 3239 } else a->colmap = NULL; 3240 3241 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3242 PetscCall(PetscMalloc1(len, &a->garray)); 3243 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3244 } else a->garray = NULL; 3245 3246 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3247 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3248 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3249 3250 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3251 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3252 } 3253 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3254 *newmat = mat; 3255 PetscFunctionReturn(PETSC_SUCCESS); 3256 } 3257 3258 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3259 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3260 { 3261 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3262 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3263 PetscScalar *matvals; 3264 3265 PetscFunctionBegin; 3266 PetscCall(PetscViewerSetUp(viewer)); 3267 3268 /* read in matrix header */ 3269 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3270 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3271 M = header[1]; 3272 N = header[2]; 3273 nz = header[3]; 3274 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3275 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3276 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3277 3278 /* set block sizes from the viewer's .info file */ 3279 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3280 /* set local sizes if not set already */ 3281 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3282 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3283 /* set global sizes if not set already */ 3284 if (mat->rmap->N < 0) mat->rmap->N = M; 3285 if (mat->cmap->N < 0) mat->cmap->N = N; 3286 PetscCall(PetscLayoutSetUp(mat->rmap)); 3287 PetscCall(PetscLayoutSetUp(mat->cmap)); 3288 3289 /* check if the matrix sizes are correct */ 3290 PetscCall(MatGetSize(mat, &rows, &cols)); 3291 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3292 PetscCall(MatGetBlockSize(mat, &bs)); 3293 PetscCall(MatGetLocalSize(mat, &m, &n)); 3294 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3295 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3296 mbs = m / bs; 3297 nbs = n / bs; 3298 3299 /* read in row lengths and build row indices */ 3300 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3301 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3302 rowidxs[0] = 0; 3303 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3304 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3305 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3306 3307 /* read in column indices and matrix values */ 3308 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3309 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3310 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3311 3312 { /* preallocate matrix storage */ 3313 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3314 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3315 PetscBool sbaij, done; 3316 PetscInt *d_nnz, *o_nnz; 3317 3318 PetscCall(PetscBTCreate(nbs, &bt)); 3319 PetscCall(PetscHSetICreate(&ht)); 3320 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3321 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3322 for (i = 0; i < mbs; i++) { 3323 PetscCall(PetscBTMemzero(nbs, bt)); 3324 PetscCall(PetscHSetIClear(ht)); 3325 for (k = 0; k < bs; k++) { 3326 PetscInt row = bs * i + k; 3327 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3328 PetscInt col = colidxs[j]; 3329 if (!sbaij || col >= row) { 3330 if (col >= cs && col < ce) { 3331 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3332 } else { 3333 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3334 if (done) o_nnz[i]++; 3335 } 3336 } 3337 } 3338 } 3339 } 3340 PetscCall(PetscBTDestroy(&bt)); 3341 PetscCall(PetscHSetIDestroy(&ht)); 3342 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3343 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3344 PetscCall(PetscFree2(d_nnz, o_nnz)); 3345 } 3346 3347 /* store matrix values */ 3348 for (i = 0; i < m; i++) { 3349 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3350 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3351 } 3352 3353 PetscCall(PetscFree(rowidxs)); 3354 PetscCall(PetscFree2(colidxs, matvals)); 3355 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3356 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3357 PetscFunctionReturn(PETSC_SUCCESS); 3358 } 3359 3360 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3361 { 3362 PetscBool isbinary; 3363 3364 PetscFunctionBegin; 3365 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3366 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3367 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3368 PetscFunctionReturn(PETSC_SUCCESS); 3369 } 3370 3371 /*@ 3372 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3373 3374 Input Parameters: 3375 + mat - the matrix 3376 - fact - factor 3377 3378 Options Database Key: 3379 . -mat_use_hash_table <fact> - provide the factor 3380 3381 Level: advanced 3382 3383 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3384 @*/ 3385 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3386 { 3387 PetscFunctionBegin; 3388 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3389 PetscFunctionReturn(PETSC_SUCCESS); 3390 } 3391 3392 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3393 { 3394 Mat_MPIBAIJ *baij; 3395 3396 PetscFunctionBegin; 3397 baij = (Mat_MPIBAIJ *)mat->data; 3398 baij->ht_fact = fact; 3399 PetscFunctionReturn(PETSC_SUCCESS); 3400 } 3401 3402 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3403 { 3404 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3405 PetscBool flg; 3406 3407 PetscFunctionBegin; 3408 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3409 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3410 if (Ad) *Ad = a->A; 3411 if (Ao) *Ao = a->B; 3412 if (colmap) *colmap = a->garray; 3413 PetscFunctionReturn(PETSC_SUCCESS); 3414 } 3415 3416 /* 3417 Special version for direct calls from Fortran (to eliminate two function call overheads 3418 */ 3419 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3420 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3421 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3422 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3423 #endif 3424 3425 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3426 /*@C 3427 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3428 3429 Collective 3430 3431 Input Parameters: 3432 + matin - the matrix 3433 . min - number of input rows 3434 . im - input rows 3435 . nin - number of input columns 3436 . in - input columns 3437 . v - numerical values input 3438 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3439 3440 Level: advanced 3441 3442 Developer Notes: 3443 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3444 3445 .seealso: `Mat`, `MatSetValuesBlocked()` 3446 @*/ 3447 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3448 { 3449 /* convert input arguments to C version */ 3450 Mat mat = *matin; 3451 PetscInt m = *min, n = *nin; 3452 InsertMode addv = *addvin; 3453 3454 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3455 const MatScalar *value; 3456 MatScalar *barray = baij->barray; 3457 PetscBool roworiented = baij->roworiented; 3458 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3459 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3460 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3461 3462 PetscFunctionBegin; 3463 /* tasks normally handled by MatSetValuesBlocked() */ 3464 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3465 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3466 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3467 if (mat->assembled) { 3468 mat->was_assembled = PETSC_TRUE; 3469 mat->assembled = PETSC_FALSE; 3470 } 3471 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3472 3473 if (!barray) { 3474 PetscCall(PetscMalloc1(bs2, &barray)); 3475 baij->barray = barray; 3476 } 3477 3478 if (roworiented) stepval = (n - 1) * bs; 3479 else stepval = (m - 1) * bs; 3480 3481 for (i = 0; i < m; i++) { 3482 if (im[i] < 0) continue; 3483 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3484 if (im[i] >= rstart && im[i] < rend) { 3485 row = im[i] - rstart; 3486 for (j = 0; j < n; j++) { 3487 /* If NumCol = 1 then a copy is not required */ 3488 if ((roworiented) && (n == 1)) { 3489 barray = (MatScalar *)v + i * bs2; 3490 } else if ((!roworiented) && (m == 1)) { 3491 barray = (MatScalar *)v + j * bs2; 3492 } else { /* Here a copy is required */ 3493 if (roworiented) { 3494 value = v + i * (stepval + bs) * bs + j * bs; 3495 } else { 3496 value = v + j * (stepval + bs) * bs + i * bs; 3497 } 3498 for (ii = 0; ii < bs; ii++, value += stepval) { 3499 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3500 } 3501 barray -= bs2; 3502 } 3503 3504 if (in[j] >= cstart && in[j] < cend) { 3505 col = in[j] - cstart; 3506 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3507 } else if (in[j] < 0) { 3508 continue; 3509 } else { 3510 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3511 if (mat->was_assembled) { 3512 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3513 3514 #if defined(PETSC_USE_DEBUG) 3515 #if defined(PETSC_USE_CTABLE) 3516 { 3517 PetscInt data; 3518 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3519 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3520 } 3521 #else 3522 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3523 #endif 3524 #endif 3525 #if defined(PETSC_USE_CTABLE) 3526 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3527 col = (col - 1) / bs; 3528 #else 3529 col = (baij->colmap[in[j]] - 1) / bs; 3530 #endif 3531 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3532 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3533 col = in[j]; 3534 } 3535 } else col = in[j]; 3536 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3537 } 3538 } 3539 } else { 3540 if (!baij->donotstash) { 3541 if (roworiented) { 3542 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3543 } else { 3544 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3545 } 3546 } 3547 } 3548 } 3549 3550 /* task normally handled by MatSetValuesBlocked() */ 3551 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3552 PetscFunctionReturn(PETSC_SUCCESS); 3553 } 3554 3555 /*@ 3556 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3557 3558 Collective 3559 3560 Input Parameters: 3561 + comm - MPI communicator 3562 . bs - the block size, only a block size of 1 is supported 3563 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3564 . n - This value should be the same as the local size used in creating the 3565 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3566 calculated if `N` is given) For square matrices `n` is almost always `m`. 3567 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3568 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3569 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3570 . j - column indices 3571 - a - matrix values 3572 3573 Output Parameter: 3574 . mat - the matrix 3575 3576 Level: intermediate 3577 3578 Notes: 3579 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3580 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3581 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3582 3583 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3584 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3585 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3586 with column-major ordering within blocks. 3587 3588 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3589 3590 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3591 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3592 @*/ 3593 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3594 { 3595 PetscFunctionBegin; 3596 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3597 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3598 PetscCall(MatCreate(comm, mat)); 3599 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3600 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3601 PetscCall(MatSetBlockSize(*mat, bs)); 3602 PetscCall(MatSetUp(*mat)); 3603 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3604 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3605 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3606 PetscFunctionReturn(PETSC_SUCCESS); 3607 } 3608 3609 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3610 { 3611 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3612 PetscInt *indx; 3613 PetscScalar *values; 3614 3615 PetscFunctionBegin; 3616 PetscCall(MatGetSize(inmat, &m, &N)); 3617 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3618 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3619 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3620 PetscInt *bindx, rmax = a->rmax, j; 3621 PetscMPIInt rank, size; 3622 3623 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3624 mbs = m / bs; 3625 Nbs = N / cbs; 3626 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3627 nbs = n / cbs; 3628 3629 PetscCall(PetscMalloc1(rmax, &bindx)); 3630 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3631 3632 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3633 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3634 if (rank == size - 1) { 3635 /* Check sum(nbs) = Nbs */ 3636 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3637 } 3638 3639 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3640 for (i = 0; i < mbs; i++) { 3641 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3642 nnz = nnz / bs; 3643 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3644 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3645 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3646 } 3647 PetscCall(PetscFree(bindx)); 3648 3649 PetscCall(MatCreate(comm, outmat)); 3650 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3651 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3652 PetscCall(MatSetType(*outmat, MATBAIJ)); 3653 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3654 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3655 MatPreallocateEnd(dnz, onz); 3656 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3657 } 3658 3659 /* numeric phase */ 3660 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3661 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3662 3663 for (i = 0; i < m; i++) { 3664 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3665 Ii = i + rstart; 3666 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3667 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3668 } 3669 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3670 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3671 PetscFunctionReturn(PETSC_SUCCESS); 3672 } 3673