1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)(B)->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp, *tmp2; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 721 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 722 v = amat->a; 723 jj = amat->j; 724 for (i = 0; i < amat->nz; i++) { 725 for (j = 0; j < bs; j++) { 726 col = bs * (cstart + *jj) + j; /* column index */ 727 for (row = 0; row < bs; row++) { 728 tmp[col] += PetscAbsScalar(*v); 729 v++; 730 } 731 } 732 jj++; 733 } 734 v = bmat->a; 735 jj = bmat->j; 736 for (i = 0; i < bmat->nz; i++) { 737 for (j = 0; j < bs; j++) { 738 col = bs * garray[*jj] + j; 739 for (row = 0; row < bs; row++) { 740 tmp[col] += PetscAbsScalar(*v); 741 v++; 742 } 743 } 744 jj++; 745 } 746 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 747 *nrm = 0.0; 748 for (j = 0; j < mat->cmap->N; j++) { 749 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 750 } 751 PetscCall(PetscFree(tmp)); 752 PetscCall(PetscFree(tmp2)); 753 } else if (type == NORM_INFINITY) { /* max row sum */ 754 PetscReal *sums; 755 PetscCall(PetscMalloc1(bs, &sums)); 756 sum = 0.0; 757 for (j = 0; j < amat->mbs; j++) { 758 for (row = 0; row < bs; row++) sums[row] = 0.0; 759 v = amat->a + bs2 * amat->i[j]; 760 nz = amat->i[j + 1] - amat->i[j]; 761 for (i = 0; i < nz; i++) { 762 for (col = 0; col < bs; col++) { 763 for (row = 0; row < bs; row++) { 764 sums[row] += PetscAbsScalar(*v); 765 v++; 766 } 767 } 768 } 769 v = bmat->a + bs2 * bmat->i[j]; 770 nz = bmat->i[j + 1] - bmat->i[j]; 771 for (i = 0; i < nz; i++) { 772 for (col = 0; col < bs; col++) { 773 for (row = 0; row < bs; row++) { 774 sums[row] += PetscAbsScalar(*v); 775 v++; 776 } 777 } 778 } 779 for (row = 0; row < bs; row++) { 780 if (sums[row] > sum) sum = sums[row]; 781 } 782 } 783 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 784 PetscCall(PetscFree(sums)); 785 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 786 } 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 /* 791 Creates the hash table, and sets the table 792 This table is created only once. 793 If new entried need to be added to the matrix 794 then the hash table has to be destroyed and 795 recreated. 796 */ 797 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 800 Mat A = baij->A, B = baij->B; 801 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 802 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 803 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 804 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 805 PetscInt *HT, key; 806 MatScalar **HD; 807 PetscReal tmp; 808 #if defined(PETSC_USE_INFO) 809 PetscInt ct = 0, max = 0; 810 #endif 811 812 PetscFunctionBegin; 813 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 814 815 baij->ht_size = (PetscInt)(factor * nz); 816 ht_size = baij->ht_size; 817 818 /* Allocate Memory for Hash Table */ 819 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 820 HD = baij->hd; 821 HT = baij->ht; 822 823 /* Loop Over A */ 824 for (i = 0; i < a->mbs; i++) { 825 for (j = ai[i]; j < ai[i + 1]; j++) { 826 row = i + rstart; 827 col = aj[j] + cstart; 828 829 key = row * Nbs + col + 1; 830 h1 = HASH(ht_size, key, tmp); 831 for (k = 0; k < ht_size; k++) { 832 if (!HT[(h1 + k) % ht_size]) { 833 HT[(h1 + k) % ht_size] = key; 834 HD[(h1 + k) % ht_size] = a->a + j * bs2; 835 break; 836 #if defined(PETSC_USE_INFO) 837 } else { 838 ct++; 839 #endif 840 } 841 } 842 #if defined(PETSC_USE_INFO) 843 if (k > max) max = k; 844 #endif 845 } 846 } 847 /* Loop Over B */ 848 for (i = 0; i < b->mbs; i++) { 849 for (j = bi[i]; j < bi[i + 1]; j++) { 850 row = i + rstart; 851 col = garray[bj[j]]; 852 key = row * Nbs + col + 1; 853 h1 = HASH(ht_size, key, tmp); 854 for (k = 0; k < ht_size; k++) { 855 if (!HT[(h1 + k) % ht_size]) { 856 HT[(h1 + k) % ht_size] = key; 857 HD[(h1 + k) % ht_size] = b->a + j * bs2; 858 break; 859 #if defined(PETSC_USE_INFO) 860 } else { 861 ct++; 862 #endif 863 } 864 } 865 #if defined(PETSC_USE_INFO) 866 if (k > max) max = k; 867 #endif 868 } 869 } 870 871 /* Print Summary */ 872 #if defined(PETSC_USE_INFO) 873 for (i = 0, j = 0; i < ht_size; i++) { 874 if (HT[i]) j++; 875 } 876 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 877 #endif 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 882 { 883 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 884 PetscInt nstash, reallocs; 885 886 PetscFunctionBegin; 887 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 888 889 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 890 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 891 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 892 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 893 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 899 { 900 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 901 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 902 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 903 PetscInt *row, *col; 904 PetscBool r1, r2, r3, other_disassembled; 905 MatScalar *val; 906 PetscMPIInt n; 907 908 PetscFunctionBegin; 909 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 910 if (!baij->donotstash && !mat->nooffprocentries) { 911 while (1) { 912 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 913 if (!flg) break; 914 915 for (i = 0; i < n;) { 916 /* Now identify the consecutive vals belonging to the same row */ 917 for (j = i, rstart = row[j]; j < n; j++) { 918 if (row[j] != rstart) break; 919 } 920 if (j < n) ncols = j - i; 921 else ncols = n - i; 922 /* Now assemble all these values with a single function call */ 923 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 924 i = j; 925 } 926 } 927 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 928 /* Now process the block-stash. Since the values are stashed column-oriented, 929 set the row-oriented flag to column-oriented, and after MatSetValues() 930 restore the original flags */ 931 r1 = baij->roworiented; 932 r2 = a->roworiented; 933 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 934 935 baij->roworiented = PETSC_FALSE; 936 a->roworiented = PETSC_FALSE; 937 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 938 while (1) { 939 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 940 if (!flg) break; 941 942 for (i = 0; i < n;) { 943 /* Now identify the consecutive vals belonging to the same row */ 944 for (j = i, rstart = row[j]; j < n; j++) { 945 if (row[j] != rstart) break; 946 } 947 if (j < n) ncols = j - i; 948 else ncols = n - i; 949 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 950 i = j; 951 } 952 } 953 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 954 955 baij->roworiented = r1; 956 a->roworiented = r2; 957 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 958 } 959 960 PetscCall(MatAssemblyBegin(baij->A, mode)); 961 PetscCall(MatAssemblyEnd(baij->A, mode)); 962 963 /* determine if any processor has disassembled, if so we must 964 also disassemble ourselves, in order that we may reassemble. */ 965 /* 966 if nonzero structure of submatrix B cannot change then we know that 967 no processor disassembled thus we can skip this stuff 968 */ 969 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 970 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 971 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 972 } 973 974 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 975 PetscCall(MatAssemblyBegin(baij->B, mode)); 976 PetscCall(MatAssemblyEnd(baij->B, mode)); 977 978 #if defined(PETSC_USE_INFO) 979 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 980 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 981 982 baij->ht_total_ct = 0; 983 baij->ht_insert_ct = 0; 984 } 985 #endif 986 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 987 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 988 989 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 990 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 991 } 992 993 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 994 995 baij->rowvalues = NULL; 996 997 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 998 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 999 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1000 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1001 } 1002 PetscFunctionReturn(PETSC_SUCCESS); 1003 } 1004 1005 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1006 #include <petscdraw.h> 1007 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1008 { 1009 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1010 PetscMPIInt rank = baij->rank; 1011 PetscInt bs = mat->rmap->bs; 1012 PetscBool iascii, isdraw; 1013 PetscViewer sviewer; 1014 PetscViewerFormat format; 1015 1016 PetscFunctionBegin; 1017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1019 if (iascii) { 1020 PetscCall(PetscViewerGetFormat(viewer, &format)); 1021 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1022 MatInfo info; 1023 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1024 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1025 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1026 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1027 mat->rmap->bs, (double)info.memory)); 1028 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1029 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1030 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1031 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1032 PetscCall(PetscViewerFlush(viewer)); 1033 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1034 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1035 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1038 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } 1043 } 1044 1045 if (isdraw) { 1046 PetscDraw draw; 1047 PetscBool isnull; 1048 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1049 PetscCall(PetscDrawIsNull(draw, &isnull)); 1050 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1051 } 1052 1053 { 1054 /* assemble the entire matrix onto first processor. */ 1055 Mat A; 1056 Mat_SeqBAIJ *Aloc; 1057 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1058 MatScalar *a; 1059 const char *matname; 1060 1061 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1062 /* Perhaps this should be the type of mat? */ 1063 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1064 if (rank == 0) { 1065 PetscCall(MatSetSizes(A, M, N, M, N)); 1066 } else { 1067 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1068 } 1069 PetscCall(MatSetType(A, MATMPIBAIJ)); 1070 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1071 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1072 1073 /* copy over the A part */ 1074 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1075 ai = Aloc->i; 1076 aj = Aloc->j; 1077 a = Aloc->a; 1078 PetscCall(PetscMalloc1(bs, &rvals)); 1079 1080 for (i = 0; i < mbs; i++) { 1081 rvals[0] = bs * (baij->rstartbs + i); 1082 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1083 for (j = ai[i]; j < ai[i + 1]; j++) { 1084 col = (baij->cstartbs + aj[j]) * bs; 1085 for (k = 0; k < bs; k++) { 1086 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1087 col++; 1088 a += bs; 1089 } 1090 } 1091 } 1092 /* copy over the B part */ 1093 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1094 ai = Aloc->i; 1095 aj = Aloc->j; 1096 a = Aloc->a; 1097 for (i = 0; i < mbs; i++) { 1098 rvals[0] = bs * (baij->rstartbs + i); 1099 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1100 for (j = ai[i]; j < ai[i + 1]; j++) { 1101 col = baij->garray[aj[j]] * bs; 1102 for (k = 0; k < bs; k++) { 1103 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1104 col++; 1105 a += bs; 1106 } 1107 } 1108 } 1109 PetscCall(PetscFree(rvals)); 1110 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1111 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1112 /* 1113 Everyone has to call to draw the matrix since the graphics waits are 1114 synchronized across all processors that share the PetscDraw object 1115 */ 1116 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1117 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1118 if (rank == 0) { 1119 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1120 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1121 } 1122 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1123 PetscCall(MatDestroy(&A)); 1124 } 1125 PetscFunctionReturn(PETSC_SUCCESS); 1126 } 1127 1128 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1129 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1130 { 1131 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1132 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1133 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1134 const PetscInt *garray = aij->garray; 1135 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1136 PetscInt64 nz, hnz; 1137 PetscInt *rowlens, *colidxs; 1138 PetscScalar *matvals; 1139 PetscMPIInt rank; 1140 1141 PetscFunctionBegin; 1142 PetscCall(PetscViewerSetUp(viewer)); 1143 1144 M = mat->rmap->N; 1145 N = mat->cmap->N; 1146 m = mat->rmap->n; 1147 rs = mat->rmap->rstart; 1148 cs = mat->cmap->rstart; 1149 bs = mat->rmap->bs; 1150 nz = bs * bs * (A->nz + B->nz); 1151 1152 /* write matrix header */ 1153 header[0] = MAT_FILE_CLASSID; 1154 header[1] = M; 1155 header[2] = N; 1156 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1157 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1158 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1159 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1160 1161 /* fill in and store row lengths */ 1162 PetscCall(PetscMalloc1(m, &rowlens)); 1163 for (cnt = 0, i = 0; i < A->mbs; i++) 1164 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1165 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1166 PetscCall(PetscFree(rowlens)); 1167 1168 /* fill in and store column indices */ 1169 PetscCall(PetscMalloc1(nz, &colidxs)); 1170 for (cnt = 0, i = 0; i < A->mbs; i++) { 1171 for (k = 0; k < bs; k++) { 1172 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1173 if (garray[B->j[jb]] > cs / bs) break; 1174 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1175 } 1176 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1177 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1178 for (; jb < B->i[i + 1]; jb++) 1179 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1180 } 1181 } 1182 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1183 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1184 PetscCall(PetscFree(colidxs)); 1185 1186 /* fill in and store nonzero values */ 1187 PetscCall(PetscMalloc1(nz, &matvals)); 1188 for (cnt = 0, i = 0; i < A->mbs; i++) { 1189 for (k = 0; k < bs; k++) { 1190 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1191 if (garray[B->j[jb]] > cs / bs) break; 1192 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1193 } 1194 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1195 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1196 for (; jb < B->i[i + 1]; jb++) 1197 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1198 } 1199 } 1200 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1201 PetscCall(PetscFree(matvals)); 1202 1203 /* write block size option to the viewer's .info file */ 1204 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1205 PetscFunctionReturn(PETSC_SUCCESS); 1206 } 1207 1208 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1209 { 1210 PetscBool iascii, isdraw, issocket, isbinary; 1211 1212 PetscFunctionBegin; 1213 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1214 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1217 if (iascii || isdraw || issocket) { 1218 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1219 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1224 { 1225 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1226 PetscInt nt; 1227 1228 PetscFunctionBegin; 1229 PetscCall(VecGetLocalSize(xx, &nt)); 1230 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1231 PetscCall(VecGetLocalSize(yy, &nt)); 1232 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1233 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1234 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1235 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1236 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1237 PetscFunctionReturn(PETSC_SUCCESS); 1238 } 1239 1240 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1241 { 1242 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1243 1244 PetscFunctionBegin; 1245 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1246 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1247 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1248 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1249 PetscFunctionReturn(PETSC_SUCCESS); 1250 } 1251 1252 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1255 1256 PetscFunctionBegin; 1257 /* do nondiagonal part */ 1258 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1259 /* do local part */ 1260 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1261 /* add partial results together */ 1262 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1264 PetscFunctionReturn(PETSC_SUCCESS); 1265 } 1266 1267 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1268 { 1269 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1270 1271 PetscFunctionBegin; 1272 /* do nondiagonal part */ 1273 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1274 /* do local part */ 1275 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1276 /* add partial results together */ 1277 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 /* 1283 This only works correctly for square matrices where the subblock A->A is the 1284 diagonal block 1285 */ 1286 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1287 { 1288 PetscFunctionBegin; 1289 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1290 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1291 PetscFunctionReturn(PETSC_SUCCESS); 1292 } 1293 1294 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1295 { 1296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1297 1298 PetscFunctionBegin; 1299 PetscCall(MatScale(a->A, aa)); 1300 PetscCall(MatScale(a->B, aa)); 1301 PetscFunctionReturn(PETSC_SUCCESS); 1302 } 1303 1304 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1305 { 1306 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1307 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1308 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1309 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1310 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1311 1312 PetscFunctionBegin; 1313 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1314 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1315 mat->getrowactive = PETSC_TRUE; 1316 1317 if (!mat->rowvalues && (idx || v)) { 1318 /* 1319 allocate enough space to hold information from the longest row. 1320 */ 1321 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1322 PetscInt max = 1, mbs = mat->mbs, tmp; 1323 for (i = 0; i < mbs; i++) { 1324 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1325 if (max < tmp) max = tmp; 1326 } 1327 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1328 } 1329 lrow = row - brstart; 1330 1331 pvA = &vworkA; 1332 pcA = &cworkA; 1333 pvB = &vworkB; 1334 pcB = &cworkB; 1335 if (!v) { 1336 pvA = NULL; 1337 pvB = NULL; 1338 } 1339 if (!idx) { 1340 pcA = NULL; 1341 if (!v) pcB = NULL; 1342 } 1343 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1344 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1345 nztot = nzA + nzB; 1346 1347 cmap = mat->garray; 1348 if (v || idx) { 1349 if (nztot) { 1350 /* Sort by increasing column numbers, assuming A and B already sorted */ 1351 PetscInt imark = -1; 1352 if (v) { 1353 *v = v_p = mat->rowvalues; 1354 for (i = 0; i < nzB; i++) { 1355 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1356 else break; 1357 } 1358 imark = i; 1359 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1360 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1361 } 1362 if (idx) { 1363 *idx = idx_p = mat->rowindices; 1364 if (imark > -1) { 1365 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1366 } else { 1367 for (i = 0; i < nzB; i++) { 1368 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1369 else break; 1370 } 1371 imark = i; 1372 } 1373 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1374 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1375 } 1376 } else { 1377 if (idx) *idx = NULL; 1378 if (v) *v = NULL; 1379 } 1380 } 1381 *nz = nztot; 1382 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1383 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1388 { 1389 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1390 1391 PetscFunctionBegin; 1392 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1393 baij->getrowactive = PETSC_FALSE; 1394 PetscFunctionReturn(PETSC_SUCCESS); 1395 } 1396 1397 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1398 { 1399 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1400 1401 PetscFunctionBegin; 1402 PetscCall(MatZeroEntries(l->A)); 1403 PetscCall(MatZeroEntries(l->B)); 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1408 { 1409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1410 Mat A = a->A, B = a->B; 1411 PetscLogDouble isend[5], irecv[5]; 1412 1413 PetscFunctionBegin; 1414 info->block_size = (PetscReal)matin->rmap->bs; 1415 1416 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1417 1418 isend[0] = info->nz_used; 1419 isend[1] = info->nz_allocated; 1420 isend[2] = info->nz_unneeded; 1421 isend[3] = info->memory; 1422 isend[4] = info->mallocs; 1423 1424 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1425 1426 isend[0] += info->nz_used; 1427 isend[1] += info->nz_allocated; 1428 isend[2] += info->nz_unneeded; 1429 isend[3] += info->memory; 1430 isend[4] += info->mallocs; 1431 1432 if (flag == MAT_LOCAL) { 1433 info->nz_used = isend[0]; 1434 info->nz_allocated = isend[1]; 1435 info->nz_unneeded = isend[2]; 1436 info->memory = isend[3]; 1437 info->mallocs = isend[4]; 1438 } else if (flag == MAT_GLOBAL_MAX) { 1439 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1440 1441 info->nz_used = irecv[0]; 1442 info->nz_allocated = irecv[1]; 1443 info->nz_unneeded = irecv[2]; 1444 info->memory = irecv[3]; 1445 info->mallocs = irecv[4]; 1446 } else if (flag == MAT_GLOBAL_SUM) { 1447 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1448 1449 info->nz_used = irecv[0]; 1450 info->nz_allocated = irecv[1]; 1451 info->nz_unneeded = irecv[2]; 1452 info->memory = irecv[3]; 1453 info->mallocs = irecv[4]; 1454 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1455 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1456 info->fill_ratio_needed = 0; 1457 info->factor_mallocs = 0; 1458 PetscFunctionReturn(PETSC_SUCCESS); 1459 } 1460 1461 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1462 { 1463 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1464 1465 PetscFunctionBegin; 1466 switch (op) { 1467 case MAT_NEW_NONZERO_LOCATIONS: 1468 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1469 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1470 case MAT_KEEP_NONZERO_PATTERN: 1471 case MAT_NEW_NONZERO_LOCATION_ERR: 1472 MatCheckPreallocated(A, 1); 1473 PetscCall(MatSetOption(a->A, op, flg)); 1474 PetscCall(MatSetOption(a->B, op, flg)); 1475 break; 1476 case MAT_ROW_ORIENTED: 1477 MatCheckPreallocated(A, 1); 1478 a->roworiented = flg; 1479 1480 PetscCall(MatSetOption(a->A, op, flg)); 1481 PetscCall(MatSetOption(a->B, op, flg)); 1482 break; 1483 case MAT_FORCE_DIAGONAL_ENTRIES: 1484 case MAT_SORTED_FULL: 1485 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1486 break; 1487 case MAT_IGNORE_OFF_PROC_ENTRIES: 1488 a->donotstash = flg; 1489 break; 1490 case MAT_USE_HASH_TABLE: 1491 a->ht_flag = flg; 1492 a->ht_fact = 1.39; 1493 break; 1494 case MAT_SYMMETRIC: 1495 case MAT_STRUCTURALLY_SYMMETRIC: 1496 case MAT_HERMITIAN: 1497 case MAT_SUBMAT_SINGLEIS: 1498 case MAT_SYMMETRY_ETERNAL: 1499 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1500 case MAT_SPD_ETERNAL: 1501 /* if the diagonal matrix is square it inherits some of the properties above */ 1502 break; 1503 default: 1504 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1505 } 1506 PetscFunctionReturn(PETSC_SUCCESS); 1507 } 1508 1509 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1512 Mat_SeqBAIJ *Aloc; 1513 Mat B; 1514 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1515 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1516 MatScalar *a; 1517 1518 PetscFunctionBegin; 1519 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1520 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1521 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1522 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1523 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1524 /* Do not know preallocation information, but must set block size */ 1525 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1526 } else { 1527 B = *matout; 1528 } 1529 1530 /* copy over the A part */ 1531 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1532 ai = Aloc->i; 1533 aj = Aloc->j; 1534 a = Aloc->a; 1535 PetscCall(PetscMalloc1(bs, &rvals)); 1536 1537 for (i = 0; i < mbs; i++) { 1538 rvals[0] = bs * (baij->rstartbs + i); 1539 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1540 for (j = ai[i]; j < ai[i + 1]; j++) { 1541 col = (baij->cstartbs + aj[j]) * bs; 1542 for (k = 0; k < bs; k++) { 1543 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1544 1545 col++; 1546 a += bs; 1547 } 1548 } 1549 } 1550 /* copy over the B part */ 1551 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1552 ai = Aloc->i; 1553 aj = Aloc->j; 1554 a = Aloc->a; 1555 for (i = 0; i < mbs; i++) { 1556 rvals[0] = bs * (baij->rstartbs + i); 1557 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1558 for (j = ai[i]; j < ai[i + 1]; j++) { 1559 col = baij->garray[aj[j]] * bs; 1560 for (k = 0; k < bs; k++) { 1561 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1562 col++; 1563 a += bs; 1564 } 1565 } 1566 } 1567 PetscCall(PetscFree(rvals)); 1568 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1569 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1570 1571 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1572 else PetscCall(MatHeaderMerge(A, &B)); 1573 PetscFunctionReturn(PETSC_SUCCESS); 1574 } 1575 1576 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1577 { 1578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1579 Mat a = baij->A, b = baij->B; 1580 PetscInt s1, s2, s3; 1581 1582 PetscFunctionBegin; 1583 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1584 if (rr) { 1585 PetscCall(VecGetLocalSize(rr, &s1)); 1586 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1587 /* Overlap communication with computation. */ 1588 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1589 } 1590 if (ll) { 1591 PetscCall(VecGetLocalSize(ll, &s1)); 1592 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1593 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1594 } 1595 /* scale the diagonal block */ 1596 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1597 1598 if (rr) { 1599 /* Do a scatter end and then right scale the off-diagonal block */ 1600 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1601 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1602 } 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 } 1605 1606 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1607 { 1608 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1609 PetscInt *lrows; 1610 PetscInt r, len; 1611 PetscBool cong; 1612 1613 PetscFunctionBegin; 1614 /* get locally owned rows */ 1615 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1616 /* fix right-hand side if needed */ 1617 if (x && b) { 1618 const PetscScalar *xx; 1619 PetscScalar *bb; 1620 1621 PetscCall(VecGetArrayRead(x, &xx)); 1622 PetscCall(VecGetArray(b, &bb)); 1623 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1624 PetscCall(VecRestoreArrayRead(x, &xx)); 1625 PetscCall(VecRestoreArray(b, &bb)); 1626 } 1627 1628 /* actually zap the local rows */ 1629 /* 1630 Zero the required rows. If the "diagonal block" of the matrix 1631 is square and the user wishes to set the diagonal we use separate 1632 code so that MatSetValues() is not called for each diagonal allocating 1633 new memory, thus calling lots of mallocs and slowing things down. 1634 1635 */ 1636 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1637 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1638 PetscCall(MatHasCongruentLayouts(A, &cong)); 1639 if ((diag != 0.0) && cong) { 1640 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1641 } else if (diag != 0.0) { 1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1643 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1644 for (r = 0; r < len; ++r) { 1645 const PetscInt row = lrows[r] + A->rmap->rstart; 1646 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1647 } 1648 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1650 } else { 1651 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1652 } 1653 PetscCall(PetscFree(lrows)); 1654 1655 /* only change matrix nonzero state if pattern was allowed to be changed */ 1656 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1657 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1658 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1659 } 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1664 { 1665 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1666 PetscMPIInt n = A->rmap->n, p = 0; 1667 PetscInt i, j, k, r, len = 0, row, col, count; 1668 PetscInt *lrows, *owners = A->rmap->range; 1669 PetscSFNode *rrows; 1670 PetscSF sf; 1671 const PetscScalar *xx; 1672 PetscScalar *bb, *mask; 1673 Vec xmask, lmask; 1674 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1675 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1676 PetscScalar *aa; 1677 1678 PetscFunctionBegin; 1679 /* Create SF where leaves are input rows and roots are owned rows */ 1680 PetscCall(PetscMalloc1(n, &lrows)); 1681 for (r = 0; r < n; ++r) lrows[r] = -1; 1682 PetscCall(PetscMalloc1(N, &rrows)); 1683 for (r = 0; r < N; ++r) { 1684 const PetscInt idx = rows[r]; 1685 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1686 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1687 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1688 } 1689 rrows[r].rank = p; 1690 rrows[r].index = rows[r] - owners[p]; 1691 } 1692 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1693 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1694 /* Collect flags for rows to be zeroed */ 1695 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1696 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1697 PetscCall(PetscSFDestroy(&sf)); 1698 /* Compress and put in row numbers */ 1699 for (r = 0; r < n; ++r) 1700 if (lrows[r] >= 0) lrows[len++] = r; 1701 /* zero diagonal part of matrix */ 1702 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1703 /* handle off-diagonal part of matrix */ 1704 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1705 PetscCall(VecDuplicate(l->lvec, &lmask)); 1706 PetscCall(VecGetArray(xmask, &bb)); 1707 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1708 PetscCall(VecRestoreArray(xmask, &bb)); 1709 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1710 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecDestroy(&xmask)); 1712 if (x) { 1713 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1716 PetscCall(VecGetArray(b, &bb)); 1717 } 1718 PetscCall(VecGetArray(lmask, &mask)); 1719 /* remove zeroed rows of off-diagonal matrix */ 1720 for (i = 0; i < len; ++i) { 1721 row = lrows[i]; 1722 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1723 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1724 for (k = 0; k < count; ++k) { 1725 aa[0] = 0.0; 1726 aa += bs; 1727 } 1728 } 1729 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1730 for (i = 0; i < l->B->rmap->N; ++i) { 1731 row = i / bs; 1732 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1733 for (k = 0; k < bs; ++k) { 1734 col = bs * baij->j[j] + k; 1735 if (PetscAbsScalar(mask[col])) { 1736 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1737 if (x) bb[i] -= aa[0] * xx[col]; 1738 aa[0] = 0.0; 1739 } 1740 } 1741 } 1742 } 1743 if (x) { 1744 PetscCall(VecRestoreArray(b, &bb)); 1745 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1746 } 1747 PetscCall(VecRestoreArray(lmask, &mask)); 1748 PetscCall(VecDestroy(&lmask)); 1749 PetscCall(PetscFree(lrows)); 1750 1751 /* only change matrix nonzero state if pattern was allowed to be changed */ 1752 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1753 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1754 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1755 } 1756 PetscFunctionReturn(PETSC_SUCCESS); 1757 } 1758 1759 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1760 { 1761 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1762 1763 PetscFunctionBegin; 1764 PetscCall(MatSetUnfactored(a->A)); 1765 PetscFunctionReturn(PETSC_SUCCESS); 1766 } 1767 1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1769 1770 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1771 { 1772 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1773 Mat a, b, c, d; 1774 PetscBool flg; 1775 1776 PetscFunctionBegin; 1777 a = matA->A; 1778 b = matA->B; 1779 c = matB->A; 1780 d = matB->B; 1781 1782 PetscCall(MatEqual(a, c, &flg)); 1783 if (flg) PetscCall(MatEqual(b, d, &flg)); 1784 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1785 PetscFunctionReturn(PETSC_SUCCESS); 1786 } 1787 1788 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1789 { 1790 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1791 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1792 1793 PetscFunctionBegin; 1794 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1795 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1796 PetscCall(MatCopy_Basic(A, B, str)); 1797 } else { 1798 PetscCall(MatCopy(a->A, b->A, str)); 1799 PetscCall(MatCopy(a->B, b->B, str)); 1800 } 1801 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1806 { 1807 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1808 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1809 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1810 1811 PetscFunctionBegin; 1812 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1817 { 1818 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1819 PetscBLASInt bnz, one = 1; 1820 Mat_SeqBAIJ *x, *y; 1821 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1822 1823 PetscFunctionBegin; 1824 if (str == SAME_NONZERO_PATTERN) { 1825 PetscScalar alpha = a; 1826 x = (Mat_SeqBAIJ *)xx->A->data; 1827 y = (Mat_SeqBAIJ *)yy->A->data; 1828 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1829 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1830 x = (Mat_SeqBAIJ *)xx->B->data; 1831 y = (Mat_SeqBAIJ *)yy->B->data; 1832 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1833 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1834 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1835 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1836 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1837 } else { 1838 Mat B; 1839 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1840 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1841 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1842 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1843 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1844 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1845 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1846 PetscCall(MatSetType(B, MATMPIBAIJ)); 1847 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1848 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1849 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1850 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1851 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1852 PetscCall(MatHeaderMerge(Y, &B)); 1853 PetscCall(PetscFree(nnz_d)); 1854 PetscCall(PetscFree(nnz_o)); 1855 } 1856 PetscFunctionReturn(PETSC_SUCCESS); 1857 } 1858 1859 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1860 { 1861 PetscFunctionBegin; 1862 if (PetscDefined(USE_COMPLEX)) { 1863 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1864 1865 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1866 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1867 } 1868 PetscFunctionReturn(PETSC_SUCCESS); 1869 } 1870 1871 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1872 { 1873 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1874 1875 PetscFunctionBegin; 1876 PetscCall(MatRealPart(a->A)); 1877 PetscCall(MatRealPart(a->B)); 1878 PetscFunctionReturn(PETSC_SUCCESS); 1879 } 1880 1881 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1882 { 1883 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1884 1885 PetscFunctionBegin; 1886 PetscCall(MatImaginaryPart(a->A)); 1887 PetscCall(MatImaginaryPart(a->B)); 1888 PetscFunctionReturn(PETSC_SUCCESS); 1889 } 1890 1891 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1892 { 1893 IS iscol_local; 1894 PetscInt csize; 1895 1896 PetscFunctionBegin; 1897 PetscCall(ISGetLocalSize(iscol, &csize)); 1898 if (call == MAT_REUSE_MATRIX) { 1899 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1900 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1901 } else { 1902 PetscCall(ISAllGather(iscol, &iscol_local)); 1903 } 1904 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1905 if (call == MAT_INITIAL_MATRIX) { 1906 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1907 PetscCall(ISDestroy(&iscol_local)); 1908 } 1909 PetscFunctionReturn(PETSC_SUCCESS); 1910 } 1911 1912 /* 1913 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1914 in local and then by concatenating the local matrices the end result. 1915 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1916 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1917 */ 1918 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1919 { 1920 PetscMPIInt rank, size; 1921 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1922 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1923 Mat M, Mreuse; 1924 MatScalar *vwork, *aa; 1925 MPI_Comm comm; 1926 IS isrow_new, iscol_new; 1927 Mat_SeqBAIJ *aij; 1928 1929 PetscFunctionBegin; 1930 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1931 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1932 PetscCallMPI(MPI_Comm_size(comm, &size)); 1933 /* The compression and expansion should be avoided. Doesn't point 1934 out errors, might change the indices, hence buggey */ 1935 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1936 if (isrow == iscol) { 1937 iscol_new = isrow_new; 1938 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1939 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1940 1941 if (call == MAT_REUSE_MATRIX) { 1942 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1943 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1944 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1945 } else { 1946 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1947 } 1948 PetscCall(ISDestroy(&isrow_new)); 1949 PetscCall(ISDestroy(&iscol_new)); 1950 /* 1951 m - number of local rows 1952 n - number of columns (same on all processors) 1953 rstart - first row in new global matrix generated 1954 */ 1955 PetscCall(MatGetBlockSize(mat, &bs)); 1956 PetscCall(MatGetSize(Mreuse, &m, &n)); 1957 m = m / bs; 1958 n = n / bs; 1959 1960 if (call == MAT_INITIAL_MATRIX) { 1961 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1962 ii = aij->i; 1963 jj = aij->j; 1964 1965 /* 1966 Determine the number of non-zeros in the diagonal and off-diagonal 1967 portions of the matrix in order to do correct preallocation 1968 */ 1969 1970 /* first get start and end of "diagonal" columns */ 1971 if (csize == PETSC_DECIDE) { 1972 PetscCall(ISGetSize(isrow, &mglobal)); 1973 if (mglobal == n * bs) { /* square matrix */ 1974 nlocal = m; 1975 } else { 1976 nlocal = n / size + ((n % size) > rank); 1977 } 1978 } else { 1979 nlocal = csize / bs; 1980 } 1981 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1982 rstart = rend - nlocal; 1983 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1984 1985 /* next, compute all the lengths */ 1986 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1987 for (i = 0; i < m; i++) { 1988 jend = ii[i + 1] - ii[i]; 1989 olen = 0; 1990 dlen = 0; 1991 for (j = 0; j < jend; j++) { 1992 if (*jj < rstart || *jj >= rend) olen++; 1993 else dlen++; 1994 jj++; 1995 } 1996 olens[i] = olen; 1997 dlens[i] = dlen; 1998 } 1999 PetscCall(MatCreate(comm, &M)); 2000 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2001 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2002 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2003 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2004 PetscCall(PetscFree2(dlens, olens)); 2005 } else { 2006 PetscInt ml, nl; 2007 2008 M = *newmat; 2009 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2010 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2011 PetscCall(MatZeroEntries(M)); 2012 /* 2013 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2014 rather than the slower MatSetValues(). 2015 */ 2016 M->was_assembled = PETSC_TRUE; 2017 M->assembled = PETSC_FALSE; 2018 } 2019 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2020 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2021 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2022 ii = aij->i; 2023 jj = aij->j; 2024 aa = aij->a; 2025 for (i = 0; i < m; i++) { 2026 row = rstart / bs + i; 2027 nz = ii[i + 1] - ii[i]; 2028 cwork = jj; 2029 jj = PetscSafePointerPlusOffset(jj, nz); 2030 vwork = aa; 2031 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2032 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2033 } 2034 2035 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2036 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2037 *newmat = M; 2038 2039 /* save submatrix used in processor for next request */ 2040 if (call == MAT_INITIAL_MATRIX) { 2041 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2042 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2043 } 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2048 { 2049 MPI_Comm comm, pcomm; 2050 PetscInt clocal_size, nrows; 2051 const PetscInt *rows; 2052 PetscMPIInt size; 2053 IS crowp, lcolp; 2054 2055 PetscFunctionBegin; 2056 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2057 /* make a collective version of 'rowp' */ 2058 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2059 if (pcomm == comm) { 2060 crowp = rowp; 2061 } else { 2062 PetscCall(ISGetSize(rowp, &nrows)); 2063 PetscCall(ISGetIndices(rowp, &rows)); 2064 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2065 PetscCall(ISRestoreIndices(rowp, &rows)); 2066 } 2067 PetscCall(ISSetPermutation(crowp)); 2068 /* make a local version of 'colp' */ 2069 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2070 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2071 if (size == 1) { 2072 lcolp = colp; 2073 } else { 2074 PetscCall(ISAllGather(colp, &lcolp)); 2075 } 2076 PetscCall(ISSetPermutation(lcolp)); 2077 /* now we just get the submatrix */ 2078 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2079 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2080 /* clean up */ 2081 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2082 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2087 { 2088 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2089 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2090 2091 PetscFunctionBegin; 2092 if (nghosts) *nghosts = B->nbs; 2093 if (ghosts) *ghosts = baij->garray; 2094 PetscFunctionReturn(PETSC_SUCCESS); 2095 } 2096 2097 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2098 { 2099 Mat B; 2100 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2101 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2102 Mat_SeqAIJ *b; 2103 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2104 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2105 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2106 2107 PetscFunctionBegin; 2108 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2109 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2110 2111 /* Tell every processor the number of nonzeros per row */ 2112 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2113 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2114 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2115 displs = recvcounts + size; 2116 for (i = 0; i < size; i++) { 2117 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2118 displs[i] = A->rmap->range[i] / bs; 2119 } 2120 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2121 /* Create the sequential matrix of the same type as the local block diagonal */ 2122 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2123 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2124 PetscCall(MatSetType(B, MATSEQAIJ)); 2125 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2126 b = (Mat_SeqAIJ *)B->data; 2127 2128 /* Copy my part of matrix column indices over */ 2129 sendcount = ad->nz + bd->nz; 2130 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2131 a_jsendbuf = ad->j; 2132 b_jsendbuf = bd->j; 2133 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2134 cnt = 0; 2135 for (i = 0; i < n; i++) { 2136 /* put in lower diagonal portion */ 2137 m = bd->i[i + 1] - bd->i[i]; 2138 while (m > 0) { 2139 /* is it above diagonal (in bd (compressed) numbering) */ 2140 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2141 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2142 m--; 2143 } 2144 2145 /* put in diagonal portion */ 2146 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2147 2148 /* put in upper diagonal portion */ 2149 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2150 } 2151 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2152 2153 /* Gather all column indices to all processors */ 2154 for (i = 0; i < size; i++) { 2155 recvcounts[i] = 0; 2156 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2157 } 2158 displs[0] = 0; 2159 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2160 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2161 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2162 /* set the b->ilen (length of each row) values */ 2163 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2164 /* set the b->i indices */ 2165 b->i[0] = 0; 2166 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2167 PetscCall(PetscFree(lens)); 2168 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2169 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2170 PetscCall(PetscFree(recvcounts)); 2171 2172 PetscCall(MatPropagateSymmetryOptions(A, B)); 2173 *newmat = B; 2174 PetscFunctionReturn(PETSC_SUCCESS); 2175 } 2176 2177 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2178 { 2179 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2180 Vec bb1 = NULL; 2181 2182 PetscFunctionBegin; 2183 if (flag == SOR_APPLY_UPPER) { 2184 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2189 2190 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2191 if (flag & SOR_ZERO_INITIAL_GUESS) { 2192 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2193 its--; 2194 } 2195 2196 while (its--) { 2197 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2198 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2199 2200 /* update rhs: bb1 = bb - B*x */ 2201 PetscCall(VecScale(mat->lvec, -1.0)); 2202 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2203 2204 /* local sweep */ 2205 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2206 } 2207 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2208 if (flag & SOR_ZERO_INITIAL_GUESS) { 2209 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2210 its--; 2211 } 2212 while (its--) { 2213 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2214 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2215 2216 /* update rhs: bb1 = bb - B*x */ 2217 PetscCall(VecScale(mat->lvec, -1.0)); 2218 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2219 2220 /* local sweep */ 2221 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2222 } 2223 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2224 if (flag & SOR_ZERO_INITIAL_GUESS) { 2225 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2226 its--; 2227 } 2228 while (its--) { 2229 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2230 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2231 2232 /* update rhs: bb1 = bb - B*x */ 2233 PetscCall(VecScale(mat->lvec, -1.0)); 2234 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2235 2236 /* local sweep */ 2237 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2238 } 2239 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2240 2241 PetscCall(VecDestroy(&bb1)); 2242 PetscFunctionReturn(PETSC_SUCCESS); 2243 } 2244 2245 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2246 { 2247 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2248 PetscInt m, N, i, *garray = aij->garray; 2249 PetscInt ib, jb, bs = A->rmap->bs; 2250 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2251 MatScalar *a_val = a_aij->a; 2252 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2253 MatScalar *b_val = b_aij->a; 2254 PetscReal *work; 2255 2256 PetscFunctionBegin; 2257 PetscCall(MatGetSize(A, &m, &N)); 2258 PetscCall(PetscCalloc1(N, &work)); 2259 if (type == NORM_2) { 2260 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2261 for (jb = 0; jb < bs; jb++) { 2262 for (ib = 0; ib < bs; ib++) { 2263 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2264 a_val++; 2265 } 2266 } 2267 } 2268 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2269 for (jb = 0; jb < bs; jb++) { 2270 for (ib = 0; ib < bs; ib++) { 2271 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2272 b_val++; 2273 } 2274 } 2275 } 2276 } else if (type == NORM_1) { 2277 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2278 for (jb = 0; jb < bs; jb++) { 2279 for (ib = 0; ib < bs; ib++) { 2280 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2281 a_val++; 2282 } 2283 } 2284 } 2285 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2286 for (jb = 0; jb < bs; jb++) { 2287 for (ib = 0; ib < bs; ib++) { 2288 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2289 b_val++; 2290 } 2291 } 2292 } 2293 } else if (type == NORM_INFINITY) { 2294 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2295 for (jb = 0; jb < bs; jb++) { 2296 for (ib = 0; ib < bs; ib++) { 2297 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2298 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2299 a_val++; 2300 } 2301 } 2302 } 2303 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2304 for (jb = 0; jb < bs; jb++) { 2305 for (ib = 0; ib < bs; ib++) { 2306 int col = garray[b_aij->j[i]] * bs + jb; 2307 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2308 b_val++; 2309 } 2310 } 2311 } 2312 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2313 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2314 for (jb = 0; jb < bs; jb++) { 2315 for (ib = 0; ib < bs; ib++) { 2316 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2317 a_val++; 2318 } 2319 } 2320 } 2321 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2322 for (jb = 0; jb < bs; jb++) { 2323 for (ib = 0; ib < bs; ib++) { 2324 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2325 b_val++; 2326 } 2327 } 2328 } 2329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2330 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2331 for (jb = 0; jb < bs; jb++) { 2332 for (ib = 0; ib < bs; ib++) { 2333 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2334 a_val++; 2335 } 2336 } 2337 } 2338 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2339 for (jb = 0; jb < bs; jb++) { 2340 for (ib = 0; ib < bs; ib++) { 2341 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2342 b_val++; 2343 } 2344 } 2345 } 2346 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2347 if (type == NORM_INFINITY) { 2348 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2349 } else { 2350 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2351 } 2352 PetscCall(PetscFree(work)); 2353 if (type == NORM_2) { 2354 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2355 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2356 for (i = 0; i < N; i++) reductions[i] /= m; 2357 } 2358 PetscFunctionReturn(PETSC_SUCCESS); 2359 } 2360 2361 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2362 { 2363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2364 2365 PetscFunctionBegin; 2366 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2367 A->factorerrortype = a->A->factorerrortype; 2368 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2369 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2370 PetscFunctionReturn(PETSC_SUCCESS); 2371 } 2372 2373 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2374 { 2375 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2376 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2377 2378 PetscFunctionBegin; 2379 if (!Y->preallocated) { 2380 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2381 } else if (!aij->nz) { 2382 PetscInt nonew = aij->nonew; 2383 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2384 aij->nonew = nonew; 2385 } 2386 PetscCall(MatShift_Basic(Y, a)); 2387 PetscFunctionReturn(PETSC_SUCCESS); 2388 } 2389 2390 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2391 { 2392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2393 2394 PetscFunctionBegin; 2395 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2396 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2397 if (d) { 2398 PetscInt rstart; 2399 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2400 *d += rstart / A->rmap->bs; 2401 } 2402 PetscFunctionReturn(PETSC_SUCCESS); 2403 } 2404 2405 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2406 { 2407 PetscFunctionBegin; 2408 *a = ((Mat_MPIBAIJ *)A->data)->A; 2409 PetscFunctionReturn(PETSC_SUCCESS); 2410 } 2411 2412 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2413 { 2414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2415 2416 PetscFunctionBegin; 2417 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2418 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2419 PetscFunctionReturn(PETSC_SUCCESS); 2420 } 2421 2422 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2423 MatGetRow_MPIBAIJ, 2424 MatRestoreRow_MPIBAIJ, 2425 MatMult_MPIBAIJ, 2426 /* 4*/ MatMultAdd_MPIBAIJ, 2427 MatMultTranspose_MPIBAIJ, 2428 MatMultTransposeAdd_MPIBAIJ, 2429 NULL, 2430 NULL, 2431 NULL, 2432 /*10*/ NULL, 2433 NULL, 2434 NULL, 2435 MatSOR_MPIBAIJ, 2436 MatTranspose_MPIBAIJ, 2437 /*15*/ MatGetInfo_MPIBAIJ, 2438 MatEqual_MPIBAIJ, 2439 MatGetDiagonal_MPIBAIJ, 2440 MatDiagonalScale_MPIBAIJ, 2441 MatNorm_MPIBAIJ, 2442 /*20*/ MatAssemblyBegin_MPIBAIJ, 2443 MatAssemblyEnd_MPIBAIJ, 2444 MatSetOption_MPIBAIJ, 2445 MatZeroEntries_MPIBAIJ, 2446 /*24*/ MatZeroRows_MPIBAIJ, 2447 NULL, 2448 NULL, 2449 NULL, 2450 NULL, 2451 /*29*/ MatSetUp_MPI_Hash, 2452 NULL, 2453 NULL, 2454 MatGetDiagonalBlock_MPIBAIJ, 2455 NULL, 2456 /*34*/ MatDuplicate_MPIBAIJ, 2457 NULL, 2458 NULL, 2459 NULL, 2460 NULL, 2461 /*39*/ MatAXPY_MPIBAIJ, 2462 MatCreateSubMatrices_MPIBAIJ, 2463 MatIncreaseOverlap_MPIBAIJ, 2464 MatGetValues_MPIBAIJ, 2465 MatCopy_MPIBAIJ, 2466 /*44*/ NULL, 2467 MatScale_MPIBAIJ, 2468 MatShift_MPIBAIJ, 2469 NULL, 2470 MatZeroRowsColumns_MPIBAIJ, 2471 /*49*/ NULL, 2472 NULL, 2473 NULL, 2474 NULL, 2475 NULL, 2476 /*54*/ MatFDColoringCreate_MPIXAIJ, 2477 NULL, 2478 MatSetUnfactored_MPIBAIJ, 2479 MatPermute_MPIBAIJ, 2480 MatSetValuesBlocked_MPIBAIJ, 2481 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2482 MatDestroy_MPIBAIJ, 2483 MatView_MPIBAIJ, 2484 NULL, 2485 NULL, 2486 /*64*/ NULL, 2487 NULL, 2488 NULL, 2489 NULL, 2490 NULL, 2491 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2492 NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*74*/ NULL, 2497 MatFDColoringApply_BAIJ, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*79*/ NULL, 2502 NULL, 2503 NULL, 2504 NULL, 2505 MatLoad_MPIBAIJ, 2506 /*84*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 /*89*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*94*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*99*/ NULL, 2522 NULL, 2523 NULL, 2524 MatConjugate_MPIBAIJ, 2525 NULL, 2526 /*104*/ NULL, 2527 MatRealPart_MPIBAIJ, 2528 MatImaginaryPart_MPIBAIJ, 2529 NULL, 2530 NULL, 2531 /*109*/ NULL, 2532 NULL, 2533 NULL, 2534 NULL, 2535 MatMissingDiagonal_MPIBAIJ, 2536 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2537 NULL, 2538 MatGetGhosts_MPIBAIJ, 2539 NULL, 2540 NULL, 2541 /*119*/ NULL, 2542 NULL, 2543 NULL, 2544 NULL, 2545 MatGetMultiProcBlock_MPIBAIJ, 2546 /*124*/ NULL, 2547 MatGetColumnReductions_MPIBAIJ, 2548 MatInvertBlockDiagonal_MPIBAIJ, 2549 NULL, 2550 NULL, 2551 /*129*/ NULL, 2552 NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 /*134*/ NULL, 2557 NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 /*139*/ MatSetBlockSizes_Default, 2562 NULL, 2563 NULL, 2564 MatFDColoringSetUp_MPIXAIJ, 2565 NULL, 2566 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2567 NULL, 2568 NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 /*150*/ NULL, 2573 MatEliminateZeros_MPIBAIJ, 2574 MatGetRowSumAbs_MPIBAIJ, 2575 NULL, 2576 NULL, 2577 NULL}; 2578 2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2581 2582 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2583 { 2584 PetscInt m, rstart, cstart, cend; 2585 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2586 const PetscInt *JJ = NULL; 2587 PetscScalar *values = NULL; 2588 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2589 PetscBool nooffprocentries; 2590 2591 PetscFunctionBegin; 2592 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2593 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2594 PetscCall(PetscLayoutSetUp(B->rmap)); 2595 PetscCall(PetscLayoutSetUp(B->cmap)); 2596 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2597 m = B->rmap->n / bs; 2598 rstart = B->rmap->rstart / bs; 2599 cstart = B->cmap->rstart / bs; 2600 cend = B->cmap->rend / bs; 2601 2602 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2603 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2604 for (i = 0; i < m; i++) { 2605 nz = ii[i + 1] - ii[i]; 2606 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2607 nz_max = PetscMax(nz_max, nz); 2608 dlen = 0; 2609 olen = 0; 2610 JJ = jj + ii[i]; 2611 for (j = 0; j < nz; j++) { 2612 if (*JJ < cstart || *JJ >= cend) olen++; 2613 else dlen++; 2614 JJ++; 2615 } 2616 d_nnz[i] = dlen; 2617 o_nnz[i] = olen; 2618 } 2619 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2620 PetscCall(PetscFree2(d_nnz, o_nnz)); 2621 2622 values = (PetscScalar *)V; 2623 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2624 for (i = 0; i < m; i++) { 2625 PetscInt row = i + rstart; 2626 PetscInt ncols = ii[i + 1] - ii[i]; 2627 const PetscInt *icols = jj + ii[i]; 2628 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2629 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2630 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2631 } else { /* block ordering does not match so we can only insert one block at a time. */ 2632 PetscInt j; 2633 for (j = 0; j < ncols; j++) { 2634 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2635 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2636 } 2637 } 2638 } 2639 2640 if (!V) PetscCall(PetscFree(values)); 2641 nooffprocentries = B->nooffprocentries; 2642 B->nooffprocentries = PETSC_TRUE; 2643 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2644 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2645 B->nooffprocentries = nooffprocentries; 2646 2647 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2648 PetscFunctionReturn(PETSC_SUCCESS); 2649 } 2650 2651 /*@C 2652 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2653 2654 Collective 2655 2656 Input Parameters: 2657 + B - the matrix 2658 . bs - the block size 2659 . i - the indices into `j` for the start of each local row (starts with zero) 2660 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2661 - v - optional values in the matrix, use `NULL` if not provided 2662 2663 Level: advanced 2664 2665 Notes: 2666 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2667 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2668 called this routine. 2669 2670 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2671 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2672 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2673 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2674 block column and the second index is over columns within a block. 2675 2676 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2677 2678 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2679 @*/ 2680 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2681 { 2682 PetscFunctionBegin; 2683 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2684 PetscValidType(B, 1); 2685 PetscValidLogicalCollectiveInt(B, bs, 2); 2686 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2691 { 2692 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2693 PetscInt i; 2694 PetscMPIInt size; 2695 2696 PetscFunctionBegin; 2697 if (B->hash_active) { 2698 B->ops[0] = b->cops; 2699 B->hash_active = PETSC_FALSE; 2700 } 2701 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2702 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2703 PetscCall(PetscLayoutSetUp(B->rmap)); 2704 PetscCall(PetscLayoutSetUp(B->cmap)); 2705 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2706 2707 if (d_nnz) { 2708 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2709 } 2710 if (o_nnz) { 2711 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2712 } 2713 2714 b->bs2 = bs * bs; 2715 b->mbs = B->rmap->n / bs; 2716 b->nbs = B->cmap->n / bs; 2717 b->Mbs = B->rmap->N / bs; 2718 b->Nbs = B->cmap->N / bs; 2719 2720 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2721 b->rstartbs = B->rmap->rstart / bs; 2722 b->rendbs = B->rmap->rend / bs; 2723 b->cstartbs = B->cmap->rstart / bs; 2724 b->cendbs = B->cmap->rend / bs; 2725 2726 #if defined(PETSC_USE_CTABLE) 2727 PetscCall(PetscHMapIDestroy(&b->colmap)); 2728 #else 2729 PetscCall(PetscFree(b->colmap)); 2730 #endif 2731 PetscCall(PetscFree(b->garray)); 2732 PetscCall(VecDestroy(&b->lvec)); 2733 PetscCall(VecScatterDestroy(&b->Mvctx)); 2734 2735 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2736 2737 MatSeqXAIJGetOptions_Private(b->B); 2738 PetscCall(MatDestroy(&b->B)); 2739 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2740 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2741 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2742 MatSeqXAIJRestoreOptions_Private(b->B); 2743 2744 MatSeqXAIJGetOptions_Private(b->A); 2745 PetscCall(MatDestroy(&b->A)); 2746 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2747 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2748 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2749 MatSeqXAIJRestoreOptions_Private(b->A); 2750 2751 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2752 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2753 B->preallocated = PETSC_TRUE; 2754 B->was_assembled = PETSC_FALSE; 2755 B->assembled = PETSC_FALSE; 2756 PetscFunctionReturn(PETSC_SUCCESS); 2757 } 2758 2759 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2760 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2761 2762 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2763 { 2764 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2765 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2766 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2767 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2768 2769 PetscFunctionBegin; 2770 PetscCall(PetscMalloc1(M + 1, &ii)); 2771 ii[0] = 0; 2772 for (i = 0; i < M; i++) { 2773 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2774 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2775 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2776 /* remove one from count of matrix has diagonal */ 2777 for (j = id[i]; j < id[i + 1]; j++) { 2778 if (jd[j] == i) { 2779 ii[i + 1]--; 2780 break; 2781 } 2782 } 2783 } 2784 PetscCall(PetscMalloc1(ii[M], &jj)); 2785 cnt = 0; 2786 for (i = 0; i < M; i++) { 2787 for (j = io[i]; j < io[i + 1]; j++) { 2788 if (garray[jo[j]] > rstart) break; 2789 jj[cnt++] = garray[jo[j]]; 2790 } 2791 for (k = id[i]; k < id[i + 1]; k++) { 2792 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2793 } 2794 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2795 } 2796 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2797 PetscFunctionReturn(PETSC_SUCCESS); 2798 } 2799 2800 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2801 2802 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2803 2804 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2805 { 2806 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2807 Mat_MPIAIJ *b; 2808 Mat B; 2809 2810 PetscFunctionBegin; 2811 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2812 2813 if (reuse == MAT_REUSE_MATRIX) { 2814 B = *newmat; 2815 } else { 2816 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2817 PetscCall(MatSetType(B, MATMPIAIJ)); 2818 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2819 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2820 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2821 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2822 } 2823 b = (Mat_MPIAIJ *)B->data; 2824 2825 if (reuse == MAT_REUSE_MATRIX) { 2826 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2827 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2828 } else { 2829 PetscInt *garray = a->garray; 2830 Mat_SeqAIJ *bB; 2831 PetscInt bs, nnz; 2832 PetscCall(MatDestroy(&b->A)); 2833 PetscCall(MatDestroy(&b->B)); 2834 /* just clear out the data structure */ 2835 PetscCall(MatDisAssemble_MPIAIJ(B)); 2836 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2837 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2838 2839 /* Global numbering for b->B columns */ 2840 bB = (Mat_SeqAIJ *)b->B->data; 2841 bs = A->rmap->bs; 2842 nnz = bB->i[A->rmap->n]; 2843 for (PetscInt k = 0; k < nnz; k++) { 2844 PetscInt bj = bB->j[k] / bs; 2845 PetscInt br = bB->j[k] % bs; 2846 bB->j[k] = garray[bj] * bs + br; 2847 } 2848 } 2849 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2850 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2851 2852 if (reuse == MAT_INPLACE_MATRIX) { 2853 PetscCall(MatHeaderReplace(A, &B)); 2854 } else { 2855 *newmat = B; 2856 } 2857 PetscFunctionReturn(PETSC_SUCCESS); 2858 } 2859 2860 /*MC 2861 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2862 2863 Options Database Keys: 2864 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2865 . -mat_block_size <bs> - set the blocksize used to store the matrix 2866 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2867 - -mat_use_hash_table <fact> - set hash table factor 2868 2869 Level: beginner 2870 2871 Note: 2872 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2873 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2874 2875 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2876 M*/ 2877 2878 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2879 2880 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2881 { 2882 Mat_MPIBAIJ *b; 2883 PetscBool flg = PETSC_FALSE; 2884 2885 PetscFunctionBegin; 2886 PetscCall(PetscNew(&b)); 2887 B->data = (void *)b; 2888 B->ops[0] = MatOps_Values; 2889 B->assembled = PETSC_FALSE; 2890 2891 B->insertmode = NOT_SET_VALUES; 2892 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2893 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2894 2895 /* build local table of row and column ownerships */ 2896 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2897 2898 /* build cache for off array entries formed */ 2899 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2900 2901 b->donotstash = PETSC_FALSE; 2902 b->colmap = NULL; 2903 b->garray = NULL; 2904 b->roworiented = PETSC_TRUE; 2905 2906 /* stuff used in block assembly */ 2907 b->barray = NULL; 2908 2909 /* stuff used for matrix vector multiply */ 2910 b->lvec = NULL; 2911 b->Mvctx = NULL; 2912 2913 /* stuff for MatGetRow() */ 2914 b->rowindices = NULL; 2915 b->rowvalues = NULL; 2916 b->getrowactive = PETSC_FALSE; 2917 2918 /* hash table stuff */ 2919 b->ht = NULL; 2920 b->hd = NULL; 2921 b->ht_size = 0; 2922 b->ht_flag = PETSC_FALSE; 2923 b->ht_fact = 0; 2924 b->ht_total_ct = 0; 2925 b->ht_insert_ct = 0; 2926 2927 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2928 b->ijonly = PETSC_FALSE; 2929 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2933 #if defined(PETSC_HAVE_HYPRE) 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2935 #endif 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2943 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2944 2945 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2946 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2947 if (flg) { 2948 PetscReal fact = 1.39; 2949 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2950 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2951 if (fact <= 1.0) fact = 1.39; 2952 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2953 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2954 } 2955 PetscOptionsEnd(); 2956 PetscFunctionReturn(PETSC_SUCCESS); 2957 } 2958 2959 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2960 /*MC 2961 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2962 2963 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2964 and `MATMPIBAIJ` otherwise. 2965 2966 Options Database Keys: 2967 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2968 2969 Level: beginner 2970 2971 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2972 M*/ 2973 2974 /*@ 2975 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2976 (block compressed row). 2977 2978 Collective 2979 2980 Input Parameters: 2981 + B - the matrix 2982 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2983 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2984 . d_nz - number of block nonzeros per block row in diagonal portion of local 2985 submatrix (same for all local rows) 2986 . d_nnz - array containing the number of block nonzeros in the various block rows 2987 of the in diagonal portion of the local (possibly different for each block 2988 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2989 set it even if it is zero. 2990 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2991 submatrix (same for all local rows). 2992 - o_nnz - array containing the number of nonzeros in the various block rows of the 2993 off-diagonal portion of the local submatrix (possibly different for 2994 each block row) or `NULL`. 2995 2996 If the *_nnz parameter is given then the *_nz parameter is ignored 2997 2998 Options Database Keys: 2999 + -mat_block_size - size of the blocks to use 3000 - -mat_use_hash_table <fact> - set hash table factor 3001 3002 Level: intermediate 3003 3004 Notes: 3005 For good matrix assembly performance 3006 the user should preallocate the matrix storage by setting the parameters 3007 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3008 performance can be increased by more than a factor of 50. 3009 3010 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3011 than it must be used on all processors that share the object for that argument. 3012 3013 Storage Information: 3014 For a square global matrix we define each processor's diagonal portion 3015 to be its local rows and the corresponding columns (a square submatrix); 3016 each processor's off-diagonal portion encompasses the remainder of the 3017 local matrix (a rectangular submatrix). 3018 3019 The user can specify preallocated storage for the diagonal part of 3020 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3021 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3022 memory allocation. Likewise, specify preallocated storage for the 3023 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3024 3025 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3026 the figure below we depict these three local rows and all columns (0-11). 3027 3028 .vb 3029 0 1 2 3 4 5 6 7 8 9 10 11 3030 -------------------------- 3031 row 3 |o o o d d d o o o o o o 3032 row 4 |o o o d d d o o o o o o 3033 row 5 |o o o d d d o o o o o o 3034 -------------------------- 3035 .ve 3036 3037 Thus, any entries in the d locations are stored in the d (diagonal) 3038 submatrix, and any entries in the o locations are stored in the 3039 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3040 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3041 3042 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3043 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3044 In general, for PDE problems in which most nonzeros are near the diagonal, 3045 one expects `d_nz` >> `o_nz`. 3046 3047 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3048 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3049 You can also run with the option `-info` and look for messages with the string 3050 malloc in them to see if additional memory allocation was needed. 3051 3052 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3053 @*/ 3054 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3055 { 3056 PetscFunctionBegin; 3057 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3058 PetscValidType(B, 1); 3059 PetscValidLogicalCollectiveInt(B, bs, 2); 3060 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3061 PetscFunctionReturn(PETSC_SUCCESS); 3062 } 3063 3064 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3065 /*@ 3066 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3067 (block compressed row). 3068 3069 Collective 3070 3071 Input Parameters: 3072 + comm - MPI communicator 3073 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3074 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3075 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3076 This value should be the same as the local size used in creating the 3077 y vector for the matrix-vector product y = Ax. 3078 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3079 This value should be the same as the local size used in creating the 3080 x vector for the matrix-vector product y = Ax. 3081 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3082 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3083 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3084 submatrix (same for all local rows) 3085 . d_nnz - array containing the number of nonzero blocks in the various block rows 3086 of the in diagonal portion of the local (possibly different for each block 3087 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3088 and set it even if it is zero. 3089 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3090 submatrix (same for all local rows). 3091 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3092 off-diagonal portion of the local submatrix (possibly different for 3093 each block row) or NULL. 3094 3095 Output Parameter: 3096 . A - the matrix 3097 3098 Options Database Keys: 3099 + -mat_block_size - size of the blocks to use 3100 - -mat_use_hash_table <fact> - set hash table factor 3101 3102 Level: intermediate 3103 3104 Notes: 3105 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3106 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3107 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3108 3109 For good matrix assembly performance 3110 the user should preallocate the matrix storage by setting the parameters 3111 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3112 performance can be increased by more than a factor of 50. 3113 3114 If the *_nnz parameter is given then the *_nz parameter is ignored 3115 3116 A nonzero block is any block that as 1 or more nonzeros in it 3117 3118 The user MUST specify either the local or global matrix dimensions 3119 (possibly both). 3120 3121 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3122 than it must be used on all processors that share the object for that argument. 3123 3124 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3125 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3126 3127 Storage Information: 3128 For a square global matrix we define each processor's diagonal portion 3129 to be its local rows and the corresponding columns (a square submatrix); 3130 each processor's off-diagonal portion encompasses the remainder of the 3131 local matrix (a rectangular submatrix). 3132 3133 The user can specify preallocated storage for the diagonal part of 3134 the local submatrix with either d_nz or d_nnz (not both). Set 3135 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3136 memory allocation. Likewise, specify preallocated storage for the 3137 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3138 3139 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3140 the figure below we depict these three local rows and all columns (0-11). 3141 3142 .vb 3143 0 1 2 3 4 5 6 7 8 9 10 11 3144 -------------------------- 3145 row 3 |o o o d d d o o o o o o 3146 row 4 |o o o d d d o o o o o o 3147 row 5 |o o o d d d o o o o o o 3148 -------------------------- 3149 .ve 3150 3151 Thus, any entries in the d locations are stored in the d (diagonal) 3152 submatrix, and any entries in the o locations are stored in the 3153 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3154 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3155 3156 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3157 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3158 In general, for PDE problems in which most nonzeros are near the diagonal, 3159 one expects `d_nz` >> `o_nz`. 3160 3161 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3162 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3163 @*/ 3164 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3165 { 3166 PetscMPIInt size; 3167 3168 PetscFunctionBegin; 3169 PetscCall(MatCreate(comm, A)); 3170 PetscCall(MatSetSizes(*A, m, n, M, N)); 3171 PetscCallMPI(MPI_Comm_size(comm, &size)); 3172 if (size > 1) { 3173 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3174 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3175 } else { 3176 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3177 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3178 } 3179 PetscFunctionReturn(PETSC_SUCCESS); 3180 } 3181 3182 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3183 { 3184 Mat mat; 3185 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3186 PetscInt len = 0; 3187 3188 PetscFunctionBegin; 3189 *newmat = NULL; 3190 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3191 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3192 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3193 3194 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3195 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3196 if (matin->hash_active) { 3197 PetscCall(MatSetUp(mat)); 3198 } else { 3199 mat->factortype = matin->factortype; 3200 mat->preallocated = PETSC_TRUE; 3201 mat->assembled = PETSC_TRUE; 3202 mat->insertmode = NOT_SET_VALUES; 3203 3204 a = (Mat_MPIBAIJ *)mat->data; 3205 mat->rmap->bs = matin->rmap->bs; 3206 a->bs2 = oldmat->bs2; 3207 a->mbs = oldmat->mbs; 3208 a->nbs = oldmat->nbs; 3209 a->Mbs = oldmat->Mbs; 3210 a->Nbs = oldmat->Nbs; 3211 3212 a->size = oldmat->size; 3213 a->rank = oldmat->rank; 3214 a->donotstash = oldmat->donotstash; 3215 a->roworiented = oldmat->roworiented; 3216 a->rowindices = NULL; 3217 a->rowvalues = NULL; 3218 a->getrowactive = PETSC_FALSE; 3219 a->barray = NULL; 3220 a->rstartbs = oldmat->rstartbs; 3221 a->rendbs = oldmat->rendbs; 3222 a->cstartbs = oldmat->cstartbs; 3223 a->cendbs = oldmat->cendbs; 3224 3225 /* hash table stuff */ 3226 a->ht = NULL; 3227 a->hd = NULL; 3228 a->ht_size = 0; 3229 a->ht_flag = oldmat->ht_flag; 3230 a->ht_fact = oldmat->ht_fact; 3231 a->ht_total_ct = 0; 3232 a->ht_insert_ct = 0; 3233 3234 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3235 if (oldmat->colmap) { 3236 #if defined(PETSC_USE_CTABLE) 3237 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3238 #else 3239 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3240 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3241 #endif 3242 } else a->colmap = NULL; 3243 3244 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3245 PetscCall(PetscMalloc1(len, &a->garray)); 3246 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3247 } else a->garray = NULL; 3248 3249 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3250 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3251 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3252 3253 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3254 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3255 } 3256 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3257 *newmat = mat; 3258 PetscFunctionReturn(PETSC_SUCCESS); 3259 } 3260 3261 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3262 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3263 { 3264 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3265 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3266 PetscScalar *matvals; 3267 3268 PetscFunctionBegin; 3269 PetscCall(PetscViewerSetUp(viewer)); 3270 3271 /* read in matrix header */ 3272 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3273 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3274 M = header[1]; 3275 N = header[2]; 3276 nz = header[3]; 3277 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3278 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3279 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3280 3281 /* set block sizes from the viewer's .info file */ 3282 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3283 /* set local sizes if not set already */ 3284 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3285 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3286 /* set global sizes if not set already */ 3287 if (mat->rmap->N < 0) mat->rmap->N = M; 3288 if (mat->cmap->N < 0) mat->cmap->N = N; 3289 PetscCall(PetscLayoutSetUp(mat->rmap)); 3290 PetscCall(PetscLayoutSetUp(mat->cmap)); 3291 3292 /* check if the matrix sizes are correct */ 3293 PetscCall(MatGetSize(mat, &rows, &cols)); 3294 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3295 PetscCall(MatGetBlockSize(mat, &bs)); 3296 PetscCall(MatGetLocalSize(mat, &m, &n)); 3297 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3298 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3299 mbs = m / bs; 3300 nbs = n / bs; 3301 3302 /* read in row lengths and build row indices */ 3303 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3304 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3305 rowidxs[0] = 0; 3306 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3307 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3308 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3309 3310 /* read in column indices and matrix values */ 3311 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3312 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3313 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3314 3315 { /* preallocate matrix storage */ 3316 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3317 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3318 PetscBool sbaij, done; 3319 PetscInt *d_nnz, *o_nnz; 3320 3321 PetscCall(PetscBTCreate(nbs, &bt)); 3322 PetscCall(PetscHSetICreate(&ht)); 3323 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3324 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3325 for (i = 0; i < mbs; i++) { 3326 PetscCall(PetscBTMemzero(nbs, bt)); 3327 PetscCall(PetscHSetIClear(ht)); 3328 for (k = 0; k < bs; k++) { 3329 PetscInt row = bs * i + k; 3330 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3331 PetscInt col = colidxs[j]; 3332 if (!sbaij || col >= row) { 3333 if (col >= cs && col < ce) { 3334 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3335 } else { 3336 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3337 if (done) o_nnz[i]++; 3338 } 3339 } 3340 } 3341 } 3342 } 3343 PetscCall(PetscBTDestroy(&bt)); 3344 PetscCall(PetscHSetIDestroy(&ht)); 3345 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3346 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3347 PetscCall(PetscFree2(d_nnz, o_nnz)); 3348 } 3349 3350 /* store matrix values */ 3351 for (i = 0; i < m; i++) { 3352 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3353 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3354 } 3355 3356 PetscCall(PetscFree(rowidxs)); 3357 PetscCall(PetscFree2(colidxs, matvals)); 3358 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3359 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3360 PetscFunctionReturn(PETSC_SUCCESS); 3361 } 3362 3363 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3364 { 3365 PetscBool isbinary; 3366 3367 PetscFunctionBegin; 3368 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3369 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3370 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3371 PetscFunctionReturn(PETSC_SUCCESS); 3372 } 3373 3374 /*@ 3375 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3376 3377 Input Parameters: 3378 + mat - the matrix 3379 - fact - factor 3380 3381 Options Database Key: 3382 . -mat_use_hash_table <fact> - provide the factor 3383 3384 Level: advanced 3385 3386 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3387 @*/ 3388 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3389 { 3390 PetscFunctionBegin; 3391 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3392 PetscFunctionReturn(PETSC_SUCCESS); 3393 } 3394 3395 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3396 { 3397 Mat_MPIBAIJ *baij; 3398 3399 PetscFunctionBegin; 3400 baij = (Mat_MPIBAIJ *)mat->data; 3401 baij->ht_fact = fact; 3402 PetscFunctionReturn(PETSC_SUCCESS); 3403 } 3404 3405 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3406 { 3407 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3408 PetscBool flg; 3409 3410 PetscFunctionBegin; 3411 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3412 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3413 if (Ad) *Ad = a->A; 3414 if (Ao) *Ao = a->B; 3415 if (colmap) *colmap = a->garray; 3416 PetscFunctionReturn(PETSC_SUCCESS); 3417 } 3418 3419 /* 3420 Special version for direct calls from Fortran (to eliminate two function call overheads 3421 */ 3422 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3423 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3424 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3425 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3426 #endif 3427 3428 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3429 /*@C 3430 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3431 3432 Collective 3433 3434 Input Parameters: 3435 + matin - the matrix 3436 . min - number of input rows 3437 . im - input rows 3438 . nin - number of input columns 3439 . in - input columns 3440 . v - numerical values input 3441 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3442 3443 Level: advanced 3444 3445 Developer Notes: 3446 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3447 3448 .seealso: `Mat`, `MatSetValuesBlocked()` 3449 @*/ 3450 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3451 { 3452 /* convert input arguments to C version */ 3453 Mat mat = *matin; 3454 PetscInt m = *min, n = *nin; 3455 InsertMode addv = *addvin; 3456 3457 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3458 const MatScalar *value; 3459 MatScalar *barray = baij->barray; 3460 PetscBool roworiented = baij->roworiented; 3461 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3462 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3463 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3464 3465 PetscFunctionBegin; 3466 /* tasks normally handled by MatSetValuesBlocked() */ 3467 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3468 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3469 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3470 if (mat->assembled) { 3471 mat->was_assembled = PETSC_TRUE; 3472 mat->assembled = PETSC_FALSE; 3473 } 3474 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3475 3476 if (!barray) { 3477 PetscCall(PetscMalloc1(bs2, &barray)); 3478 baij->barray = barray; 3479 } 3480 3481 if (roworiented) stepval = (n - 1) * bs; 3482 else stepval = (m - 1) * bs; 3483 3484 for (i = 0; i < m; i++) { 3485 if (im[i] < 0) continue; 3486 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3487 if (im[i] >= rstart && im[i] < rend) { 3488 row = im[i] - rstart; 3489 for (j = 0; j < n; j++) { 3490 /* If NumCol = 1 then a copy is not required */ 3491 if ((roworiented) && (n == 1)) { 3492 barray = (MatScalar *)v + i * bs2; 3493 } else if ((!roworiented) && (m == 1)) { 3494 barray = (MatScalar *)v + j * bs2; 3495 } else { /* Here a copy is required */ 3496 if (roworiented) { 3497 value = v + i * (stepval + bs) * bs + j * bs; 3498 } else { 3499 value = v + j * (stepval + bs) * bs + i * bs; 3500 } 3501 for (ii = 0; ii < bs; ii++, value += stepval) { 3502 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3503 } 3504 barray -= bs2; 3505 } 3506 3507 if (in[j] >= cstart && in[j] < cend) { 3508 col = in[j] - cstart; 3509 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3510 } else if (in[j] < 0) { 3511 continue; 3512 } else { 3513 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3514 if (mat->was_assembled) { 3515 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3516 3517 #if defined(PETSC_USE_DEBUG) 3518 #if defined(PETSC_USE_CTABLE) 3519 { 3520 PetscInt data; 3521 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3522 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3523 } 3524 #else 3525 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3526 #endif 3527 #endif 3528 #if defined(PETSC_USE_CTABLE) 3529 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3530 col = (col - 1) / bs; 3531 #else 3532 col = (baij->colmap[in[j]] - 1) / bs; 3533 #endif 3534 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3535 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3536 col = in[j]; 3537 } 3538 } else col = in[j]; 3539 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3540 } 3541 } 3542 } else { 3543 if (!baij->donotstash) { 3544 if (roworiented) { 3545 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3546 } else { 3547 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3548 } 3549 } 3550 } 3551 } 3552 3553 /* task normally handled by MatSetValuesBlocked() */ 3554 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3555 PetscFunctionReturn(PETSC_SUCCESS); 3556 } 3557 3558 /*@ 3559 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3560 3561 Collective 3562 3563 Input Parameters: 3564 + comm - MPI communicator 3565 . bs - the block size, only a block size of 1 is supported 3566 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3567 . n - This value should be the same as the local size used in creating the 3568 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3569 calculated if `N` is given) For square matrices `n` is almost always `m`. 3570 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3571 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3572 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3573 . j - column indices 3574 - a - matrix values 3575 3576 Output Parameter: 3577 . mat - the matrix 3578 3579 Level: intermediate 3580 3581 Notes: 3582 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3583 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3584 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3585 3586 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3587 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3588 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3589 with column-major ordering within blocks. 3590 3591 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3592 3593 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3594 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3595 @*/ 3596 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3597 { 3598 PetscFunctionBegin; 3599 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3600 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3601 PetscCall(MatCreate(comm, mat)); 3602 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3603 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3604 PetscCall(MatSetBlockSize(*mat, bs)); 3605 PetscCall(MatSetUp(*mat)); 3606 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3607 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3608 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3609 PetscFunctionReturn(PETSC_SUCCESS); 3610 } 3611 3612 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3613 { 3614 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3615 PetscInt *indx; 3616 PetscScalar *values; 3617 3618 PetscFunctionBegin; 3619 PetscCall(MatGetSize(inmat, &m, &N)); 3620 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3621 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3622 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3623 PetscInt *bindx, rmax = a->rmax, j; 3624 PetscMPIInt rank, size; 3625 3626 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3627 mbs = m / bs; 3628 Nbs = N / cbs; 3629 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3630 nbs = n / cbs; 3631 3632 PetscCall(PetscMalloc1(rmax, &bindx)); 3633 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3634 3635 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3636 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3637 if (rank == size - 1) { 3638 /* Check sum(nbs) = Nbs */ 3639 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3640 } 3641 3642 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3643 for (i = 0; i < mbs; i++) { 3644 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3645 nnz = nnz / bs; 3646 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3647 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3648 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3649 } 3650 PetscCall(PetscFree(bindx)); 3651 3652 PetscCall(MatCreate(comm, outmat)); 3653 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3654 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3655 PetscCall(MatSetType(*outmat, MATBAIJ)); 3656 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3657 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3658 MatPreallocateEnd(dnz, onz); 3659 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3660 } 3661 3662 /* numeric phase */ 3663 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3664 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3665 3666 for (i = 0; i < m; i++) { 3667 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3668 Ii = i + rstart; 3669 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3670 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3671 } 3672 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3673 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3674 PetscFunctionReturn(PETSC_SUCCESS); 3675 } 3676