1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 101 { 102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 103 104 PetscFunctionBegin; 105 PetscCall(MatStoreValues(aij->A)); 106 PetscCall(MatStoreValues(aij->B)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 111 { 112 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 113 114 PetscFunctionBegin; 115 PetscCall(MatRetrieveValues(aij->A)); 116 PetscCall(MatRetrieveValues(aij->B)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 /* 121 Local utility routine that creates a mapping from the global column 122 number to the local number in the off-diagonal part of the local 123 storage of the matrix. This is done in a non scalable way since the 124 length of colmap equals the global matrix length. 125 */ 126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 127 { 128 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 130 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 131 132 PetscFunctionBegin; 133 #if defined(PETSC_USE_CTABLE) 134 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 135 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 136 #else 137 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 138 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 139 #endif 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 144 do { \ 145 brow = row / bs; \ 146 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 147 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 148 rmax = aimax[brow]; \ 149 nrow = ailen[brow]; \ 150 bcol = col / bs; \ 151 ridx = row % bs; \ 152 cidx = col % bs; \ 153 low = 0; \ 154 high = nrow; \ 155 while (high - low > 3) { \ 156 t = (low + high) / 2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i = low; _i < high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2 * _i + bs * cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto a_noinsert; \ 167 } \ 168 } \ 169 if (a->nonew == 1) goto a_noinsert; \ 170 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 171 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 175 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 176 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 177 rp[_i] = bcol; \ 178 ap[bs2 * _i + bs * cidx + ridx] = value; \ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } while (0) 182 183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 184 do { \ 185 brow = row / bs; \ 186 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 187 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 188 rmax = bimax[brow]; \ 189 nrow = bilen[brow]; \ 190 bcol = col / bs; \ 191 ridx = row % bs; \ 192 cidx = col % bs; \ 193 low = 0; \ 194 high = nrow; \ 195 while (high - low > 3) { \ 196 t = (low + high) / 2; \ 197 if (rp[t] > bcol) high = t; \ 198 else low = t; \ 199 } \ 200 for (_i = low; _i < high; _i++) { \ 201 if (rp[_i] > bcol) break; \ 202 if (rp[_i] == bcol) { \ 203 bap = ap + bs2 * _i + bs * cidx + ridx; \ 204 if (addv == ADD_VALUES) *bap += value; \ 205 else *bap = value; \ 206 goto b_noinsert; \ 207 } \ 208 } \ 209 if (b->nonew == 1) goto b_noinsert; \ 210 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 211 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 212 N = nrow++ - 1; \ 213 /* shift up all the later entries in this row */ \ 214 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 215 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 216 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 217 rp[_i] = bcol; \ 218 ap[bs2 * _i + bs * cidx + ridx] = value; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } while (0) 222 223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 224 { 225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscInt i, j, row, col; 229 PetscInt rstart_orig = mat->rmap->rstart; 230 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 231 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 232 233 /* Some Variables required in the macro */ 234 Mat A = baij->A; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 236 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 237 MatScalar *aa = a->a; 238 239 Mat B = baij->B; 240 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 241 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 242 MatScalar *ba = b->a; 243 244 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 245 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 246 MatScalar *ap, *bap; 247 248 PetscFunctionBegin; 249 for (i = 0; i < m; i++) { 250 if (im[i] < 0) continue; 251 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { 253 row = im[i] - rstart_orig; 254 for (j = 0; j < n; j++) { 255 if (in[j] >= cstart_orig && in[j] < cend_orig) { 256 col = in[j] - cstart_orig; 257 if (roworiented) value = v[i * n + j]; 258 else value = v[i + j * m]; 259 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 260 } else if (in[j] < 0) { 261 continue; 262 } else { 263 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 264 if (mat->was_assembled) { 265 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 266 #if defined(PETSC_USE_CTABLE) 267 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 268 col = col - 1; 269 #else 270 col = baij->colmap[in[j] / bs] - 1; 271 #endif 272 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 273 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 274 col = in[j]; 275 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 276 B = baij->B; 277 b = (Mat_SeqBAIJ *)(B)->data; 278 bimax = b->imax; 279 bi = b->i; 280 bilen = b->ilen; 281 bj = b->j; 282 ba = b->a; 283 } else { 284 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 285 col += in[j] % bs; 286 } 287 } else col = in[j]; 288 if (roworiented) value = v[i * n + j]; 289 else value = v[i + j * m]; 290 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 291 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 292 } 293 } 294 } else { 295 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 296 if (!baij->donotstash) { 297 mat->assembled = PETSC_FALSE; 298 if (roworiented) { 299 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 300 } else { 301 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 302 } 303 } 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 310 { 311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 312 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 313 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 314 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 315 PetscBool roworiented = a->roworiented; 316 const PetscScalar *value = v; 317 MatScalar *ap, *aa = a->a, *bap; 318 319 PetscFunctionBegin; 320 rp = aj + ai[row]; 321 ap = aa + bs2 * ai[row]; 322 rmax = imax[row]; 323 nrow = ailen[row]; 324 value = v; 325 low = 0; 326 high = nrow; 327 while (high - low > 7) { 328 t = (low + high) / 2; 329 if (rp[t] > col) high = t; 330 else low = t; 331 } 332 for (i = low; i < high; i++) { 333 if (rp[i] > col) break; 334 if (rp[i] == col) { 335 bap = ap + bs2 * i; 336 if (roworiented) { 337 if (is == ADD_VALUES) { 338 for (ii = 0; ii < bs; ii++) { 339 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 340 } 341 } else { 342 for (ii = 0; ii < bs; ii++) { 343 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 344 } 345 } 346 } else { 347 if (is == ADD_VALUES) { 348 for (ii = 0; ii < bs; ii++, value += bs) { 349 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 350 bap += bs; 351 } 352 } else { 353 for (ii = 0; ii < bs; ii++, value += bs) { 354 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 355 bap += bs; 356 } 357 } 358 } 359 goto noinsert2; 360 } 361 } 362 if (nonew == 1) goto noinsert2; 363 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 364 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 365 N = nrow++ - 1; 366 high++; 367 /* shift up all the later entries in this row */ 368 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 369 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 370 rp[i] = col; 371 bap = ap + bs2 * i; 372 if (roworiented) { 373 for (ii = 0; ii < bs; ii++) { 374 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 375 } 376 } else { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 379 } 380 } 381 noinsert2:; 382 ailen[row] = nrow; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 /* 387 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 388 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 389 */ 390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 393 const PetscScalar *value; 394 MatScalar *barray = baij->barray; 395 PetscBool roworiented = baij->roworiented; 396 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 397 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 398 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 399 400 PetscFunctionBegin; 401 if (!barray) { 402 PetscCall(PetscMalloc1(bs2, &barray)); 403 baij->barray = barray; 404 } 405 406 if (roworiented) stepval = (n - 1) * bs; 407 else stepval = (m - 1) * bs; 408 409 for (i = 0; i < m; i++) { 410 if (im[i] < 0) continue; 411 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 412 if (im[i] >= rstart && im[i] < rend) { 413 row = im[i] - rstart; 414 for (j = 0; j < n; j++) { 415 /* If NumCol = 1 then a copy is not required */ 416 if ((roworiented) && (n == 1)) { 417 barray = (MatScalar *)v + i * bs2; 418 } else if ((!roworiented) && (m == 1)) { 419 barray = (MatScalar *)v + j * bs2; 420 } else { /* Here a copy is required */ 421 if (roworiented) { 422 value = v + (i * (stepval + bs) + j) * bs; 423 } else { 424 value = v + (j * (stepval + bs) + i) * bs; 425 } 426 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 427 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 428 barray += bs; 429 } 430 barray -= bs2; 431 } 432 433 if (in[j] >= cstart && in[j] < cend) { 434 col = in[j] - cstart; 435 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 436 } else if (in[j] < 0) { 437 continue; 438 } else { 439 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 440 if (mat->was_assembled) { 441 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 442 443 #if defined(PETSC_USE_DEBUG) 444 #if defined(PETSC_USE_CTABLE) 445 { 446 PetscInt data; 447 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 448 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 449 } 450 #else 451 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 452 #endif 453 #endif 454 #if defined(PETSC_USE_CTABLE) 455 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 456 col = (col - 1) / bs; 457 #else 458 col = (baij->colmap[in[j]] - 1) / bs; 459 #endif 460 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 461 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 462 col = in[j]; 463 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 464 } else col = in[j]; 465 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 466 } 467 } 468 } else { 469 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 470 if (!baij->donotstash) { 471 if (roworiented) { 472 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 473 } else { 474 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 #define HASH_KEY 0.6180339887 483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 487 { 488 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 489 PetscBool roworiented = baij->roworiented; 490 PetscInt i, j, row, col; 491 PetscInt rstart_orig = mat->rmap->rstart; 492 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 493 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 494 PetscReal tmp; 495 MatScalar **HD = baij->hd, value; 496 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 497 498 PetscFunctionBegin; 499 for (i = 0; i < m; i++) { 500 if (PetscDefined(USE_DEBUG)) { 501 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 502 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 503 } 504 row = im[i]; 505 if (row >= rstart_orig && row < rend_orig) { 506 for (j = 0; j < n; j++) { 507 col = in[j]; 508 if (roworiented) value = v[i * n + j]; 509 else value = v[i + j * m]; 510 /* Look up PetscInto the Hash Table */ 511 key = (row / bs) * Nbs + (col / bs) + 1; 512 h1 = HASH(size, key, tmp); 513 514 idx = h1; 515 if (PetscDefined(USE_DEBUG)) { 516 insert_ct++; 517 total_ct++; 518 if (HT[idx] != key) { 519 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 520 ; 521 if (idx == size) { 522 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 523 ; 524 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 525 } 526 } 527 } else if (HT[idx] != key) { 528 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 529 ; 530 if (idx == size) { 531 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 532 ; 533 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 534 } 535 } 536 /* A HASH table entry is found, so insert the values at the correct address */ 537 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 538 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 539 } 540 } else if (!baij->donotstash) { 541 if (roworiented) { 542 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 543 } else { 544 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 545 } 546 } 547 } 548 if (PetscDefined(USE_DEBUG)) { 549 baij->ht_total_ct += total_ct; 550 baij->ht_insert_ct += insert_ct; 551 } 552 PetscFunctionReturn(PETSC_SUCCESS); 553 } 554 555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 556 { 557 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 558 PetscBool roworiented = baij->roworiented; 559 PetscInt i, j, ii, jj, row, col; 560 PetscInt rstart = baij->rstartbs; 561 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 562 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 563 PetscReal tmp; 564 MatScalar **HD = baij->hd, *baij_a; 565 const PetscScalar *v_t, *value; 566 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 567 568 PetscFunctionBegin; 569 if (roworiented) stepval = (n - 1) * bs; 570 else stepval = (m - 1) * bs; 571 572 for (i = 0; i < m; i++) { 573 if (PetscDefined(USE_DEBUG)) { 574 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 575 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 576 } 577 row = im[i]; 578 v_t = v + i * nbs2; 579 if (row >= rstart && row < rend) { 580 for (j = 0; j < n; j++) { 581 col = in[j]; 582 583 /* Look up into the Hash Table */ 584 key = row * Nbs + col + 1; 585 h1 = HASH(size, key, tmp); 586 587 idx = h1; 588 if (PetscDefined(USE_DEBUG)) { 589 total_ct++; 590 insert_ct++; 591 if (HT[idx] != key) { 592 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 593 ; 594 if (idx == size) { 595 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 596 ; 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 602 ; 603 if (idx == size) { 604 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 605 ; 606 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 607 } 608 } 609 baij_a = HD[idx]; 610 if (roworiented) { 611 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 612 /* value = v + (i*(stepval+bs)+j)*bs; */ 613 value = v_t; 614 v_t += bs; 615 if (addv == ADD_VALUES) { 616 for (ii = 0; ii < bs; ii++, value += stepval) { 617 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 618 } 619 } else { 620 for (ii = 0; ii < bs; ii++, value += stepval) { 621 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 622 } 623 } 624 } else { 625 value = v + j * (stepval + bs) * bs + i * bs; 626 if (addv == ADD_VALUES) { 627 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 628 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 629 } 630 } else { 631 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 632 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 633 } 634 } 635 } 636 } 637 } else { 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 if (PetscDefined(USE_DEBUG)) { 648 baij->ht_total_ct += total_ct; 649 baij->ht_insert_ct += insert_ct; 650 } 651 PetscFunctionReturn(PETSC_SUCCESS); 652 } 653 654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 655 { 656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 657 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 658 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 659 660 PetscFunctionBegin; 661 for (i = 0; i < m; i++) { 662 if (idxm[i] < 0) continue; /* negative row */ 663 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 664 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 665 row = idxm[i] - bsrstart; 666 for (j = 0; j < n; j++) { 667 if (idxn[j] < 0) continue; /* negative column */ 668 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 669 if (idxn[j] >= bscstart && idxn[j] < bscend) { 670 col = idxn[j] - bscstart; 671 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 672 } else { 673 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 674 #if defined(PETSC_USE_CTABLE) 675 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 676 data--; 677 #else 678 data = baij->colmap[idxn[j] / bs] - 1; 679 #endif 680 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 681 else { 682 col = data + idxn[j] % bs; 683 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 684 } 685 } 686 } 687 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 688 } 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 695 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 696 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 697 PetscReal sum = 0.0; 698 MatScalar *v; 699 700 PetscFunctionBegin; 701 if (baij->size == 1) { 702 PetscCall(MatNorm(baij->A, type, nrm)); 703 } else { 704 if (type == NORM_FROBENIUS) { 705 v = amat->a; 706 nz = amat->nz * bs2; 707 for (i = 0; i < nz; i++) { 708 sum += PetscRealPart(PetscConj(*v) * (*v)); 709 v++; 710 } 711 v = bmat->a; 712 nz = bmat->nz * bs2; 713 for (i = 0; i < nz; i++) { 714 sum += PetscRealPart(PetscConj(*v) * (*v)); 715 v++; 716 } 717 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 718 *nrm = PetscSqrtReal(*nrm); 719 } else if (type == NORM_1) { /* max column sum */ 720 PetscReal *tmp, *tmp2; 721 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 749 *nrm = 0.0; 750 for (j = 0; j < mat->cmap->N; j++) { 751 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 752 } 753 PetscCall(PetscFree(tmp)); 754 PetscCall(PetscFree(tmp2)); 755 } else if (type == NORM_INFINITY) { /* max row sum */ 756 PetscReal *sums; 757 PetscCall(PetscMalloc1(bs, &sums)); 758 sum = 0.0; 759 for (j = 0; j < amat->mbs; j++) { 760 for (row = 0; row < bs; row++) sums[row] = 0.0; 761 v = amat->a + bs2 * amat->i[j]; 762 nz = amat->i[j + 1] - amat->i[j]; 763 for (i = 0; i < nz; i++) { 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 sums[row] += PetscAbsScalar(*v); 767 v++; 768 } 769 } 770 } 771 v = bmat->a + bs2 * bmat->i[j]; 772 nz = bmat->i[j + 1] - bmat->i[j]; 773 for (i = 0; i < nz; i++) { 774 for (col = 0; col < bs; col++) { 775 for (row = 0; row < bs; row++) { 776 sums[row] += PetscAbsScalar(*v); 777 v++; 778 } 779 } 780 } 781 for (row = 0; row < bs; row++) { 782 if (sums[row] > sum) sum = sums[row]; 783 } 784 } 785 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 786 PetscCall(PetscFree(sums)); 787 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 788 } 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 /* 793 Creates the hash table, and sets the table 794 This table is created only once. 795 If new entried need to be added to the matrix 796 then the hash table has to be destroyed and 797 recreated. 798 */ 799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 802 Mat A = baij->A, B = baij->B; 803 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 804 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 805 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 806 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 807 PetscInt *HT, key; 808 MatScalar **HD; 809 PetscReal tmp; 810 #if defined(PETSC_USE_INFO) 811 PetscInt ct = 0, max = 0; 812 #endif 813 814 PetscFunctionBegin; 815 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 816 817 baij->ht_size = (PetscInt)(factor * nz); 818 ht_size = baij->ht_size; 819 820 /* Allocate Memory for Hash Table */ 821 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 822 HD = baij->hd; 823 HT = baij->ht; 824 825 /* Loop Over A */ 826 for (i = 0; i < a->mbs; i++) { 827 for (j = ai[i]; j < ai[i + 1]; j++) { 828 row = i + rstart; 829 col = aj[j] + cstart; 830 831 key = row * Nbs + col + 1; 832 h1 = HASH(ht_size, key, tmp); 833 for (k = 0; k < ht_size; k++) { 834 if (!HT[(h1 + k) % ht_size]) { 835 HT[(h1 + k) % ht_size] = key; 836 HD[(h1 + k) % ht_size] = a->a + j * bs2; 837 break; 838 #if defined(PETSC_USE_INFO) 839 } else { 840 ct++; 841 #endif 842 } 843 } 844 #if defined(PETSC_USE_INFO) 845 if (k > max) max = k; 846 #endif 847 } 848 } 849 /* Loop Over B */ 850 for (i = 0; i < b->mbs; i++) { 851 for (j = bi[i]; j < bi[i + 1]; j++) { 852 row = i + rstart; 853 col = garray[bj[j]]; 854 key = row * Nbs + col + 1; 855 h1 = HASH(ht_size, key, tmp); 856 for (k = 0; k < ht_size; k++) { 857 if (!HT[(h1 + k) % ht_size]) { 858 HT[(h1 + k) % ht_size] = key; 859 HD[(h1 + k) % ht_size] = b->a + j * bs2; 860 break; 861 #if defined(PETSC_USE_INFO) 862 } else { 863 ct++; 864 #endif 865 } 866 } 867 #if defined(PETSC_USE_INFO) 868 if (k > max) max = k; 869 #endif 870 } 871 } 872 873 /* Print Summary */ 874 #if defined(PETSC_USE_INFO) 875 for (i = 0, j = 0; i < ht_size; i++) { 876 if (HT[i]) j++; 877 } 878 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 879 #endif 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 886 PetscInt nstash, reallocs; 887 888 PetscFunctionBegin; 889 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 890 891 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 893 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 901 { 902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 904 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 905 PetscInt *row, *col; 906 PetscBool r1, r2, r3, other_disassembled; 907 MatScalar *val; 908 PetscMPIInt n; 909 910 PetscFunctionBegin; 911 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 912 if (!baij->donotstash && !mat->nooffprocentries) { 913 while (1) { 914 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 915 if (!flg) break; 916 917 for (i = 0; i < n;) { 918 /* Now identify the consecutive vals belonging to the same row */ 919 for (j = i, rstart = row[j]; j < n; j++) { 920 if (row[j] != rstart) break; 921 } 922 if (j < n) ncols = j - i; 923 else ncols = n - i; 924 /* Now assemble all these values with a single function call */ 925 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 926 i = j; 927 } 928 } 929 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 930 /* Now process the block-stash. Since the values are stashed column-oriented, 931 set the row-oriented flag to column-oriented, and after MatSetValues() 932 restore the original flags */ 933 r1 = baij->roworiented; 934 r2 = a->roworiented; 935 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 936 937 baij->roworiented = PETSC_FALSE; 938 a->roworiented = PETSC_FALSE; 939 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; 940 while (1) { 941 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 942 if (!flg) break; 943 944 for (i = 0; i < n;) { 945 /* Now identify the consecutive vals belonging to the same row */ 946 for (j = i, rstart = row[j]; j < n; j++) { 947 if (row[j] != rstart) break; 948 } 949 if (j < n) ncols = j - i; 950 else ncols = n - i; 951 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 952 i = j; 953 } 954 } 955 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 956 957 baij->roworiented = r1; 958 a->roworiented = r2; 959 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 960 } 961 962 PetscCall(MatAssemblyBegin(baij->A, mode)); 963 PetscCall(MatAssemblyEnd(baij->A, mode)); 964 965 /* determine if any processor has disassembled, if so we must 966 also disassemble ourselves, in order that we may reassemble. */ 967 /* 968 if nonzero structure of submatrix B cannot change then we know that 969 no processor disassembled thus we can skip this stuff 970 */ 971 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 972 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 973 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 974 } 975 976 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 977 PetscCall(MatAssemblyBegin(baij->B, mode)); 978 PetscCall(MatAssemblyEnd(baij->B, mode)); 979 980 #if defined(PETSC_USE_INFO) 981 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 982 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 983 984 baij->ht_total_ct = 0; 985 baij->ht_insert_ct = 0; 986 } 987 #endif 988 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 989 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 990 991 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 992 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 993 } 994 995 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 996 997 baij->rowvalues = NULL; 998 999 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1000 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 1001 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1002 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1003 } 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1008 #include <petscdraw.h> 1009 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1010 { 1011 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1012 PetscMPIInt rank = baij->rank; 1013 PetscInt bs = mat->rmap->bs; 1014 PetscBool iascii, isdraw; 1015 PetscViewer sviewer; 1016 PetscViewerFormat format; 1017 1018 PetscFunctionBegin; 1019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1021 if (iascii) { 1022 PetscCall(PetscViewerGetFormat(viewer, &format)); 1023 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1024 MatInfo info; 1025 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1026 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1027 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1028 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1029 mat->rmap->bs, (double)info.memory)); 1030 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1031 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1032 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1033 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1034 PetscCall(PetscViewerFlush(viewer)); 1035 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1036 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1037 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1040 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 } 1046 1047 if (isdraw) { 1048 PetscDraw draw; 1049 PetscBool isnull; 1050 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1051 PetscCall(PetscDrawIsNull(draw, &isnull)); 1052 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 { 1056 /* assemble the entire matrix onto first processor. */ 1057 Mat A; 1058 Mat_SeqBAIJ *Aloc; 1059 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1060 MatScalar *a; 1061 const char *matname; 1062 1063 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1064 /* Perhaps this should be the type of mat? */ 1065 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1066 if (rank == 0) { 1067 PetscCall(MatSetSizes(A, M, N, M, N)); 1068 } else { 1069 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1070 } 1071 PetscCall(MatSetType(A, MATMPIBAIJ)); 1072 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1073 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1074 1075 /* copy over the A part */ 1076 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1077 ai = Aloc->i; 1078 aj = Aloc->j; 1079 a = Aloc->a; 1080 PetscCall(PetscMalloc1(bs, &rvals)); 1081 1082 for (i = 0; i < mbs; i++) { 1083 rvals[0] = bs * (baij->rstartbs + i); 1084 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1085 for (j = ai[i]; j < ai[i + 1]; j++) { 1086 col = (baij->cstartbs + aj[j]) * bs; 1087 for (k = 0; k < bs; k++) { 1088 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1089 col++; 1090 a += bs; 1091 } 1092 } 1093 } 1094 /* copy over the B part */ 1095 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1096 ai = Aloc->i; 1097 aj = Aloc->j; 1098 a = Aloc->a; 1099 for (i = 0; i < mbs; i++) { 1100 rvals[0] = bs * (baij->rstartbs + i); 1101 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1102 for (j = ai[i]; j < ai[i + 1]; j++) { 1103 col = baij->garray[aj[j]] * bs; 1104 for (k = 0; k < bs; k++) { 1105 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1106 col++; 1107 a += bs; 1108 } 1109 } 1110 } 1111 PetscCall(PetscFree(rvals)); 1112 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1113 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1114 /* 1115 Everyone has to call to draw the matrix since the graphics waits are 1116 synchronized across all processors that share the PetscDraw object 1117 */ 1118 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1119 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1120 if (rank == 0) { 1121 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1122 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1123 } 1124 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1125 PetscCall(MatDestroy(&A)); 1126 } 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1131 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1132 { 1133 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1134 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1135 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1136 const PetscInt *garray = aij->garray; 1137 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1138 PetscInt64 nz, hnz; 1139 PetscInt *rowlens, *colidxs; 1140 PetscScalar *matvals; 1141 PetscMPIInt rank; 1142 1143 PetscFunctionBegin; 1144 PetscCall(PetscViewerSetUp(viewer)); 1145 1146 M = mat->rmap->N; 1147 N = mat->cmap->N; 1148 m = mat->rmap->n; 1149 rs = mat->rmap->rstart; 1150 cs = mat->cmap->rstart; 1151 bs = mat->rmap->bs; 1152 nz = bs * bs * (A->nz + B->nz); 1153 1154 /* write matrix header */ 1155 header[0] = MAT_FILE_CLASSID; 1156 header[1] = M; 1157 header[2] = N; 1158 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1159 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1160 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1161 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1162 1163 /* fill in and store row lengths */ 1164 PetscCall(PetscMalloc1(m, &rowlens)); 1165 for (cnt = 0, i = 0; i < A->mbs; i++) 1166 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1167 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1168 PetscCall(PetscFree(rowlens)); 1169 1170 /* fill in and store column indices */ 1171 PetscCall(PetscMalloc1(nz, &colidxs)); 1172 for (cnt = 0, i = 0; i < A->mbs; i++) { 1173 for (k = 0; k < bs; k++) { 1174 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1175 if (garray[B->j[jb]] > cs / bs) break; 1176 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1177 } 1178 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1179 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1180 for (; jb < B->i[i + 1]; jb++) 1181 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1182 } 1183 } 1184 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1185 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1186 PetscCall(PetscFree(colidxs)); 1187 1188 /* fill in and store nonzero values */ 1189 PetscCall(PetscMalloc1(nz, &matvals)); 1190 for (cnt = 0, i = 0; i < A->mbs; i++) { 1191 for (k = 0; k < bs; k++) { 1192 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1193 if (garray[B->j[jb]] > cs / bs) break; 1194 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1195 } 1196 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1197 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1198 for (; jb < B->i[i + 1]; jb++) 1199 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1200 } 1201 } 1202 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1203 PetscCall(PetscFree(matvals)); 1204 1205 /* write block size option to the viewer's .info file */ 1206 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1207 PetscFunctionReturn(PETSC_SUCCESS); 1208 } 1209 1210 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1211 { 1212 PetscBool iascii, isdraw, issocket, isbinary; 1213 1214 PetscFunctionBegin; 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1219 if (iascii || isdraw || issocket) { 1220 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1221 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1222 PetscFunctionReturn(PETSC_SUCCESS); 1223 } 1224 1225 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1226 { 1227 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1228 PetscInt nt; 1229 1230 PetscFunctionBegin; 1231 PetscCall(VecGetLocalSize(xx, &nt)); 1232 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1233 PetscCall(VecGetLocalSize(yy, &nt)); 1234 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1235 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1236 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1237 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1238 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1239 PetscFunctionReturn(PETSC_SUCCESS); 1240 } 1241 1242 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1243 { 1244 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1245 1246 PetscFunctionBegin; 1247 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1248 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1249 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1250 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1251 PetscFunctionReturn(PETSC_SUCCESS); 1252 } 1253 1254 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1255 { 1256 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1257 1258 PetscFunctionBegin; 1259 /* do nondiagonal part */ 1260 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1261 /* do local part */ 1262 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1263 /* add partial results together */ 1264 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1265 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1266 PetscFunctionReturn(PETSC_SUCCESS); 1267 } 1268 1269 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1270 { 1271 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1272 1273 PetscFunctionBegin; 1274 /* do nondiagonal part */ 1275 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1276 /* do local part */ 1277 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1278 /* add partial results together */ 1279 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1280 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1281 PetscFunctionReturn(PETSC_SUCCESS); 1282 } 1283 1284 /* 1285 This only works correctly for square matrices where the subblock A->A is the 1286 diagonal block 1287 */ 1288 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1289 { 1290 PetscFunctionBegin; 1291 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1292 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 1296 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1297 { 1298 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1299 1300 PetscFunctionBegin; 1301 PetscCall(MatScale(a->A, aa)); 1302 PetscCall(MatScale(a->B, aa)); 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } 1305 1306 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1307 { 1308 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1309 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1310 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1311 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1312 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1313 1314 PetscFunctionBegin; 1315 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1316 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1317 mat->getrowactive = PETSC_TRUE; 1318 1319 if (!mat->rowvalues && (idx || v)) { 1320 /* 1321 allocate enough space to hold information from the longest row. 1322 */ 1323 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1324 PetscInt max = 1, mbs = mat->mbs, tmp; 1325 for (i = 0; i < mbs; i++) { 1326 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1327 if (max < tmp) max = tmp; 1328 } 1329 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1330 } 1331 lrow = row - brstart; 1332 1333 pvA = &vworkA; 1334 pcA = &cworkA; 1335 pvB = &vworkB; 1336 pcB = &cworkB; 1337 if (!v) { 1338 pvA = NULL; 1339 pvB = NULL; 1340 } 1341 if (!idx) { 1342 pcA = NULL; 1343 if (!v) pcB = NULL; 1344 } 1345 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1346 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1347 nztot = nzA + nzB; 1348 1349 cmap = mat->garray; 1350 if (v || idx) { 1351 if (nztot) { 1352 /* Sort by increasing column numbers, assuming A and B already sorted */ 1353 PetscInt imark = -1; 1354 if (v) { 1355 *v = v_p = mat->rowvalues; 1356 for (i = 0; i < nzB; i++) { 1357 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1358 else break; 1359 } 1360 imark = i; 1361 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1362 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1363 } 1364 if (idx) { 1365 *idx = idx_p = mat->rowindices; 1366 if (imark > -1) { 1367 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1368 } else { 1369 for (i = 0; i < nzB; i++) { 1370 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1371 else break; 1372 } 1373 imark = i; 1374 } 1375 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1376 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1377 } 1378 } else { 1379 if (idx) *idx = NULL; 1380 if (v) *v = NULL; 1381 } 1382 } 1383 *nz = nztot; 1384 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1385 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1390 { 1391 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1392 1393 PetscFunctionBegin; 1394 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1395 baij->getrowactive = PETSC_FALSE; 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1400 { 1401 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1402 1403 PetscFunctionBegin; 1404 PetscCall(MatZeroEntries(l->A)); 1405 PetscCall(MatZeroEntries(l->B)); 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1410 { 1411 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1412 Mat A = a->A, B = a->B; 1413 PetscLogDouble isend[5], irecv[5]; 1414 1415 PetscFunctionBegin; 1416 info->block_size = (PetscReal)matin->rmap->bs; 1417 1418 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1419 1420 isend[0] = info->nz_used; 1421 isend[1] = info->nz_allocated; 1422 isend[2] = info->nz_unneeded; 1423 isend[3] = info->memory; 1424 isend[4] = info->mallocs; 1425 1426 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1427 1428 isend[0] += info->nz_used; 1429 isend[1] += info->nz_allocated; 1430 isend[2] += info->nz_unneeded; 1431 isend[3] += info->memory; 1432 isend[4] += info->mallocs; 1433 1434 if (flag == MAT_LOCAL) { 1435 info->nz_used = isend[0]; 1436 info->nz_allocated = isend[1]; 1437 info->nz_unneeded = isend[2]; 1438 info->memory = isend[3]; 1439 info->mallocs = isend[4]; 1440 } else if (flag == MAT_GLOBAL_MAX) { 1441 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1442 1443 info->nz_used = irecv[0]; 1444 info->nz_allocated = irecv[1]; 1445 info->nz_unneeded = irecv[2]; 1446 info->memory = irecv[3]; 1447 info->mallocs = irecv[4]; 1448 } else if (flag == MAT_GLOBAL_SUM) { 1449 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1450 1451 info->nz_used = irecv[0]; 1452 info->nz_allocated = irecv[1]; 1453 info->nz_unneeded = irecv[2]; 1454 info->memory = irecv[3]; 1455 info->mallocs = irecv[4]; 1456 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1457 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1458 info->fill_ratio_needed = 0; 1459 info->factor_mallocs = 0; 1460 PetscFunctionReturn(PETSC_SUCCESS); 1461 } 1462 1463 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1464 { 1465 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1466 1467 PetscFunctionBegin; 1468 switch (op) { 1469 case MAT_NEW_NONZERO_LOCATIONS: 1470 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1471 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1472 case MAT_KEEP_NONZERO_PATTERN: 1473 case MAT_NEW_NONZERO_LOCATION_ERR: 1474 MatCheckPreallocated(A, 1); 1475 PetscCall(MatSetOption(a->A, op, flg)); 1476 PetscCall(MatSetOption(a->B, op, flg)); 1477 break; 1478 case MAT_ROW_ORIENTED: 1479 MatCheckPreallocated(A, 1); 1480 a->roworiented = flg; 1481 1482 PetscCall(MatSetOption(a->A, op, flg)); 1483 PetscCall(MatSetOption(a->B, op, flg)); 1484 break; 1485 case MAT_FORCE_DIAGONAL_ENTRIES: 1486 case MAT_SORTED_FULL: 1487 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1488 break; 1489 case MAT_IGNORE_OFF_PROC_ENTRIES: 1490 a->donotstash = flg; 1491 break; 1492 case MAT_USE_HASH_TABLE: 1493 a->ht_flag = flg; 1494 a->ht_fact = 1.39; 1495 break; 1496 case MAT_SYMMETRIC: 1497 case MAT_STRUCTURALLY_SYMMETRIC: 1498 case MAT_HERMITIAN: 1499 case MAT_SUBMAT_SINGLEIS: 1500 case MAT_SYMMETRY_ETERNAL: 1501 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1502 case MAT_SPD_ETERNAL: 1503 /* if the diagonal matrix is square it inherits some of the properties above */ 1504 break; 1505 default: 1506 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1507 } 1508 PetscFunctionReturn(PETSC_SUCCESS); 1509 } 1510 1511 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1512 { 1513 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1514 Mat_SeqBAIJ *Aloc; 1515 Mat B; 1516 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1517 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1518 MatScalar *a; 1519 1520 PetscFunctionBegin; 1521 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1522 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1523 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1524 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1525 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1526 /* Do not know preallocation information, but must set block size */ 1527 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1528 } else { 1529 B = *matout; 1530 } 1531 1532 /* copy over the A part */ 1533 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1534 ai = Aloc->i; 1535 aj = Aloc->j; 1536 a = Aloc->a; 1537 PetscCall(PetscMalloc1(bs, &rvals)); 1538 1539 for (i = 0; i < mbs; i++) { 1540 rvals[0] = bs * (baij->rstartbs + i); 1541 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1542 for (j = ai[i]; j < ai[i + 1]; j++) { 1543 col = (baij->cstartbs + aj[j]) * bs; 1544 for (k = 0; k < bs; k++) { 1545 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1546 1547 col++; 1548 a += bs; 1549 } 1550 } 1551 } 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1554 ai = Aloc->i; 1555 aj = Aloc->j; 1556 a = Aloc->a; 1557 for (i = 0; i < mbs; i++) { 1558 rvals[0] = bs * (baij->rstartbs + i); 1559 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1560 for (j = ai[i]; j < ai[i + 1]; j++) { 1561 col = baij->garray[aj[j]] * bs; 1562 for (k = 0; k < bs; k++) { 1563 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1564 col++; 1565 a += bs; 1566 } 1567 } 1568 } 1569 PetscCall(PetscFree(rvals)); 1570 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1571 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1572 1573 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1574 else PetscCall(MatHeaderMerge(A, &B)); 1575 PetscFunctionReturn(PETSC_SUCCESS); 1576 } 1577 1578 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1579 { 1580 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1581 Mat a = baij->A, b = baij->B; 1582 PetscInt s1, s2, s3; 1583 1584 PetscFunctionBegin; 1585 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1586 if (rr) { 1587 PetscCall(VecGetLocalSize(rr, &s1)); 1588 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1589 /* Overlap communication with computation. */ 1590 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1591 } 1592 if (ll) { 1593 PetscCall(VecGetLocalSize(ll, &s1)); 1594 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1595 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1596 } 1597 /* scale the diagonal block */ 1598 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1599 1600 if (rr) { 1601 /* Do a scatter end and then right scale the off-diagonal block */ 1602 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1603 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1604 } 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1609 { 1610 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1611 PetscInt *lrows; 1612 PetscInt r, len; 1613 PetscBool cong; 1614 1615 PetscFunctionBegin; 1616 /* get locally owned rows */ 1617 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1618 /* fix right hand side if needed */ 1619 if (x && b) { 1620 const PetscScalar *xx; 1621 PetscScalar *bb; 1622 1623 PetscCall(VecGetArrayRead(x, &xx)); 1624 PetscCall(VecGetArray(b, &bb)); 1625 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1626 PetscCall(VecRestoreArrayRead(x, &xx)); 1627 PetscCall(VecRestoreArray(b, &bb)); 1628 } 1629 1630 /* actually zap the local rows */ 1631 /* 1632 Zero the required rows. If the "diagonal block" of the matrix 1633 is square and the user wishes to set the diagonal we use separate 1634 code so that MatSetValues() is not called for each diagonal allocating 1635 new memory, thus calling lots of mallocs and slowing things down. 1636 1637 */ 1638 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1639 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1640 PetscCall(MatHasCongruentLayouts(A, &cong)); 1641 if ((diag != 0.0) && cong) { 1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1643 } else if (diag != 0.0) { 1644 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1645 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1646 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1647 for (r = 0; r < len; ++r) { 1648 const PetscInt row = lrows[r] + A->rmap->rstart; 1649 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1650 } 1651 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1652 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1653 } else { 1654 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1655 } 1656 PetscCall(PetscFree(lrows)); 1657 1658 /* only change matrix nonzero state if pattern was allowed to be changed */ 1659 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern || !((Mat_SeqBAIJ *)(l->A->data))->nonew) { 1660 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1661 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1662 } 1663 PetscFunctionReturn(PETSC_SUCCESS); 1664 } 1665 1666 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1667 { 1668 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1669 PetscMPIInt n = A->rmap->n, p = 0; 1670 PetscInt i, j, k, r, len = 0, row, col, count; 1671 PetscInt *lrows, *owners = A->rmap->range; 1672 PetscSFNode *rrows; 1673 PetscSF sf; 1674 const PetscScalar *xx; 1675 PetscScalar *bb, *mask; 1676 Vec xmask, lmask; 1677 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1678 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1679 PetscScalar *aa; 1680 1681 PetscFunctionBegin; 1682 /* Create SF where leaves are input rows and roots are owned rows */ 1683 PetscCall(PetscMalloc1(n, &lrows)); 1684 for (r = 0; r < n; ++r) lrows[r] = -1; 1685 PetscCall(PetscMalloc1(N, &rrows)); 1686 for (r = 0; r < N; ++r) { 1687 const PetscInt idx = rows[r]; 1688 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1689 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1690 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1691 } 1692 rrows[r].rank = p; 1693 rrows[r].index = rows[r] - owners[p]; 1694 } 1695 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1696 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1697 /* Collect flags for rows to be zeroed */ 1698 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1699 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1700 PetscCall(PetscSFDestroy(&sf)); 1701 /* Compress and put in row numbers */ 1702 for (r = 0; r < n; ++r) 1703 if (lrows[r] >= 0) lrows[len++] = r; 1704 /* zero diagonal part of matrix */ 1705 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1706 /* handle off-diagonal part of matrix */ 1707 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1708 PetscCall(VecDuplicate(l->lvec, &lmask)); 1709 PetscCall(VecGetArray(xmask, &bb)); 1710 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1711 PetscCall(VecRestoreArray(xmask, &bb)); 1712 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1713 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecDestroy(&xmask)); 1715 if (x) { 1716 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1717 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1718 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1719 PetscCall(VecGetArray(b, &bb)); 1720 } 1721 PetscCall(VecGetArray(lmask, &mask)); 1722 /* remove zeroed rows of off-diagonal matrix */ 1723 for (i = 0; i < len; ++i) { 1724 row = lrows[i]; 1725 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1726 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1727 for (k = 0; k < count; ++k) { 1728 aa[0] = 0.0; 1729 aa += bs; 1730 } 1731 } 1732 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1733 for (i = 0; i < l->B->rmap->N; ++i) { 1734 row = i / bs; 1735 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1736 for (k = 0; k < bs; ++k) { 1737 col = bs * baij->j[j] + k; 1738 if (PetscAbsScalar(mask[col])) { 1739 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1740 if (x) bb[i] -= aa[0] * xx[col]; 1741 aa[0] = 0.0; 1742 } 1743 } 1744 } 1745 } 1746 if (x) { 1747 PetscCall(VecRestoreArray(b, &bb)); 1748 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1749 } 1750 PetscCall(VecRestoreArray(lmask, &mask)); 1751 PetscCall(VecDestroy(&lmask)); 1752 PetscCall(PetscFree(lrows)); 1753 1754 /* only change matrix nonzero state if pattern was allowed to be changed */ 1755 if (!((Mat_SeqBAIJ *)(l->A->data))->nonew) { 1756 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1757 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1758 } 1759 PetscFunctionReturn(PETSC_SUCCESS); 1760 } 1761 1762 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1763 { 1764 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1765 1766 PetscFunctionBegin; 1767 PetscCall(MatSetUnfactored(a->A)); 1768 PetscFunctionReturn(PETSC_SUCCESS); 1769 } 1770 1771 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1772 1773 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1774 { 1775 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1776 Mat a, b, c, d; 1777 PetscBool flg; 1778 1779 PetscFunctionBegin; 1780 a = matA->A; 1781 b = matA->B; 1782 c = matB->A; 1783 d = matB->B; 1784 1785 PetscCall(MatEqual(a, c, &flg)); 1786 if (flg) PetscCall(MatEqual(b, d, &flg)); 1787 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1788 PetscFunctionReturn(PETSC_SUCCESS); 1789 } 1790 1791 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1792 { 1793 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1794 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1795 1796 PetscFunctionBegin; 1797 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1798 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1799 PetscCall(MatCopy_Basic(A, B, str)); 1800 } else { 1801 PetscCall(MatCopy(a->A, b->A, str)); 1802 PetscCall(MatCopy(a->B, b->B, str)); 1803 } 1804 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1809 { 1810 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1811 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1812 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1813 1814 PetscFunctionBegin; 1815 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1816 PetscFunctionReturn(PETSC_SUCCESS); 1817 } 1818 1819 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1820 { 1821 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1822 PetscBLASInt bnz, one = 1; 1823 Mat_SeqBAIJ *x, *y; 1824 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1825 1826 PetscFunctionBegin; 1827 if (str == SAME_NONZERO_PATTERN) { 1828 PetscScalar alpha = a; 1829 x = (Mat_SeqBAIJ *)xx->A->data; 1830 y = (Mat_SeqBAIJ *)yy->A->data; 1831 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1832 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1833 x = (Mat_SeqBAIJ *)xx->B->data; 1834 y = (Mat_SeqBAIJ *)yy->B->data; 1835 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1836 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1837 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1838 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1839 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1840 } else { 1841 Mat B; 1842 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1843 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1844 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1845 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1846 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1847 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1848 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1849 PetscCall(MatSetType(B, MATMPIBAIJ)); 1850 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1851 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1852 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1853 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1854 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1855 PetscCall(MatHeaderMerge(Y, &B)); 1856 PetscCall(PetscFree(nnz_d)); 1857 PetscCall(PetscFree(nnz_o)); 1858 } 1859 PetscFunctionReturn(PETSC_SUCCESS); 1860 } 1861 1862 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1863 { 1864 PetscFunctionBegin; 1865 if (PetscDefined(USE_COMPLEX)) { 1866 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1867 1868 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1869 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1870 } 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1875 { 1876 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1877 1878 PetscFunctionBegin; 1879 PetscCall(MatRealPart(a->A)); 1880 PetscCall(MatRealPart(a->B)); 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } 1883 1884 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1885 { 1886 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1887 1888 PetscFunctionBegin; 1889 PetscCall(MatImaginaryPart(a->A)); 1890 PetscCall(MatImaginaryPart(a->B)); 1891 PetscFunctionReturn(PETSC_SUCCESS); 1892 } 1893 1894 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1895 { 1896 IS iscol_local; 1897 PetscInt csize; 1898 1899 PetscFunctionBegin; 1900 PetscCall(ISGetLocalSize(iscol, &csize)); 1901 if (call == MAT_REUSE_MATRIX) { 1902 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1903 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1904 } else { 1905 PetscCall(ISAllGather(iscol, &iscol_local)); 1906 } 1907 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1908 if (call == MAT_INITIAL_MATRIX) { 1909 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1910 PetscCall(ISDestroy(&iscol_local)); 1911 } 1912 PetscFunctionReturn(PETSC_SUCCESS); 1913 } 1914 1915 /* 1916 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1917 in local and then by concatenating the local matrices the end result. 1918 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1919 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1920 */ 1921 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1922 { 1923 PetscMPIInt rank, size; 1924 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1925 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1926 Mat M, Mreuse; 1927 MatScalar *vwork, *aa; 1928 MPI_Comm comm; 1929 IS isrow_new, iscol_new; 1930 Mat_SeqBAIJ *aij; 1931 1932 PetscFunctionBegin; 1933 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1934 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1935 PetscCallMPI(MPI_Comm_size(comm, &size)); 1936 /* The compression and expansion should be avoided. Doesn't point 1937 out errors, might change the indices, hence buggey */ 1938 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1939 if (isrow == iscol) { 1940 iscol_new = isrow_new; 1941 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1942 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1943 1944 if (call == MAT_REUSE_MATRIX) { 1945 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1946 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1947 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1948 } else { 1949 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1950 } 1951 PetscCall(ISDestroy(&isrow_new)); 1952 PetscCall(ISDestroy(&iscol_new)); 1953 /* 1954 m - number of local rows 1955 n - number of columns (same on all processors) 1956 rstart - first row in new global matrix generated 1957 */ 1958 PetscCall(MatGetBlockSize(mat, &bs)); 1959 PetscCall(MatGetSize(Mreuse, &m, &n)); 1960 m = m / bs; 1961 n = n / bs; 1962 1963 if (call == MAT_INITIAL_MATRIX) { 1964 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1965 ii = aij->i; 1966 jj = aij->j; 1967 1968 /* 1969 Determine the number of non-zeros in the diagonal and off-diagonal 1970 portions of the matrix in order to do correct preallocation 1971 */ 1972 1973 /* first get start and end of "diagonal" columns */ 1974 if (csize == PETSC_DECIDE) { 1975 PetscCall(ISGetSize(isrow, &mglobal)); 1976 if (mglobal == n * bs) { /* square matrix */ 1977 nlocal = m; 1978 } else { 1979 nlocal = n / size + ((n % size) > rank); 1980 } 1981 } else { 1982 nlocal = csize / bs; 1983 } 1984 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1985 rstart = rend - nlocal; 1986 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1987 1988 /* next, compute all the lengths */ 1989 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1990 for (i = 0; i < m; i++) { 1991 jend = ii[i + 1] - ii[i]; 1992 olen = 0; 1993 dlen = 0; 1994 for (j = 0; j < jend; j++) { 1995 if (*jj < rstart || *jj >= rend) olen++; 1996 else dlen++; 1997 jj++; 1998 } 1999 olens[i] = olen; 2000 dlens[i] = dlen; 2001 } 2002 PetscCall(MatCreate(comm, &M)); 2003 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2004 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2005 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2006 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2007 PetscCall(PetscFree2(dlens, olens)); 2008 } else { 2009 PetscInt ml, nl; 2010 2011 M = *newmat; 2012 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2013 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2014 PetscCall(MatZeroEntries(M)); 2015 /* 2016 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2017 rather than the slower MatSetValues(). 2018 */ 2019 M->was_assembled = PETSC_TRUE; 2020 M->assembled = PETSC_FALSE; 2021 } 2022 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2023 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2024 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2025 ii = aij->i; 2026 jj = aij->j; 2027 aa = aij->a; 2028 for (i = 0; i < m; i++) { 2029 row = rstart / bs + i; 2030 nz = ii[i + 1] - ii[i]; 2031 cwork = jj; 2032 jj = PetscSafePointerPlusOffset(jj, nz); 2033 vwork = aa; 2034 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2035 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2036 } 2037 2038 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2039 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2040 *newmat = M; 2041 2042 /* save submatrix used in processor for next request */ 2043 if (call == MAT_INITIAL_MATRIX) { 2044 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2045 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2046 } 2047 PetscFunctionReturn(PETSC_SUCCESS); 2048 } 2049 2050 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2051 { 2052 MPI_Comm comm, pcomm; 2053 PetscInt clocal_size, nrows; 2054 const PetscInt *rows; 2055 PetscMPIInt size; 2056 IS crowp, lcolp; 2057 2058 PetscFunctionBegin; 2059 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2060 /* make a collective version of 'rowp' */ 2061 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2062 if (pcomm == comm) { 2063 crowp = rowp; 2064 } else { 2065 PetscCall(ISGetSize(rowp, &nrows)); 2066 PetscCall(ISGetIndices(rowp, &rows)); 2067 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2068 PetscCall(ISRestoreIndices(rowp, &rows)); 2069 } 2070 PetscCall(ISSetPermutation(crowp)); 2071 /* make a local version of 'colp' */ 2072 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2073 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2074 if (size == 1) { 2075 lcolp = colp; 2076 } else { 2077 PetscCall(ISAllGather(colp, &lcolp)); 2078 } 2079 PetscCall(ISSetPermutation(lcolp)); 2080 /* now we just get the submatrix */ 2081 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2082 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2083 /* clean up */ 2084 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2085 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2086 PetscFunctionReturn(PETSC_SUCCESS); 2087 } 2088 2089 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2090 { 2091 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2092 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2093 2094 PetscFunctionBegin; 2095 if (nghosts) *nghosts = B->nbs; 2096 if (ghosts) *ghosts = baij->garray; 2097 PetscFunctionReturn(PETSC_SUCCESS); 2098 } 2099 2100 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2101 { 2102 Mat B; 2103 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2104 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2105 Mat_SeqAIJ *b; 2106 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2107 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2108 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2109 2110 PetscFunctionBegin; 2111 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2112 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2113 2114 /* Tell every processor the number of nonzeros per row */ 2115 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2116 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2117 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2118 displs = recvcounts + size; 2119 for (i = 0; i < size; i++) { 2120 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2121 displs[i] = A->rmap->range[i] / bs; 2122 } 2123 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2124 /* Create the sequential matrix of the same type as the local block diagonal */ 2125 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2126 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2127 PetscCall(MatSetType(B, MATSEQAIJ)); 2128 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2129 b = (Mat_SeqAIJ *)B->data; 2130 2131 /* Copy my part of matrix column indices over */ 2132 sendcount = ad->nz + bd->nz; 2133 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2134 a_jsendbuf = ad->j; 2135 b_jsendbuf = bd->j; 2136 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2137 cnt = 0; 2138 for (i = 0; i < n; i++) { 2139 /* put in lower diagonal portion */ 2140 m = bd->i[i + 1] - bd->i[i]; 2141 while (m > 0) { 2142 /* is it above diagonal (in bd (compressed) numbering) */ 2143 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2144 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2145 m--; 2146 } 2147 2148 /* put in diagonal portion */ 2149 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2150 2151 /* put in upper diagonal portion */ 2152 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2153 } 2154 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2155 2156 /* Gather all column indices to all processors */ 2157 for (i = 0; i < size; i++) { 2158 recvcounts[i] = 0; 2159 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2160 } 2161 displs[0] = 0; 2162 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2163 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2164 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2165 /* set the b->ilen (length of each row) values */ 2166 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2167 /* set the b->i indices */ 2168 b->i[0] = 0; 2169 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2170 PetscCall(PetscFree(lens)); 2171 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2172 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2173 PetscCall(PetscFree(recvcounts)); 2174 2175 PetscCall(MatPropagateSymmetryOptions(A, B)); 2176 *newmat = B; 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2181 { 2182 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2183 Vec bb1 = NULL; 2184 2185 PetscFunctionBegin; 2186 if (flag == SOR_APPLY_UPPER) { 2187 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2188 PetscFunctionReturn(PETSC_SUCCESS); 2189 } 2190 2191 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2192 2193 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2194 if (flag & SOR_ZERO_INITIAL_GUESS) { 2195 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2196 its--; 2197 } 2198 2199 while (its--) { 2200 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2201 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2202 2203 /* update rhs: bb1 = bb - B*x */ 2204 PetscCall(VecScale(mat->lvec, -1.0)); 2205 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2206 2207 /* local sweep */ 2208 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2209 } 2210 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2211 if (flag & SOR_ZERO_INITIAL_GUESS) { 2212 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2213 its--; 2214 } 2215 while (its--) { 2216 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2217 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2218 2219 /* update rhs: bb1 = bb - B*x */ 2220 PetscCall(VecScale(mat->lvec, -1.0)); 2221 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2222 2223 /* local sweep */ 2224 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2225 } 2226 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2227 if (flag & SOR_ZERO_INITIAL_GUESS) { 2228 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2229 its--; 2230 } 2231 while (its--) { 2232 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2233 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2234 2235 /* update rhs: bb1 = bb - B*x */ 2236 PetscCall(VecScale(mat->lvec, -1.0)); 2237 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2238 2239 /* local sweep */ 2240 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2241 } 2242 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2243 2244 PetscCall(VecDestroy(&bb1)); 2245 PetscFunctionReturn(PETSC_SUCCESS); 2246 } 2247 2248 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2249 { 2250 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2251 PetscInt m, N, i, *garray = aij->garray; 2252 PetscInt ib, jb, bs = A->rmap->bs; 2253 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2254 MatScalar *a_val = a_aij->a; 2255 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2256 MatScalar *b_val = b_aij->a; 2257 PetscReal *work; 2258 2259 PetscFunctionBegin; 2260 PetscCall(MatGetSize(A, &m, &N)); 2261 PetscCall(PetscCalloc1(N, &work)); 2262 if (type == NORM_2) { 2263 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2264 for (jb = 0; jb < bs; jb++) { 2265 for (ib = 0; ib < bs; ib++) { 2266 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2267 a_val++; 2268 } 2269 } 2270 } 2271 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2272 for (jb = 0; jb < bs; jb++) { 2273 for (ib = 0; ib < bs; ib++) { 2274 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2275 b_val++; 2276 } 2277 } 2278 } 2279 } else if (type == NORM_1) { 2280 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2281 for (jb = 0; jb < bs; jb++) { 2282 for (ib = 0; ib < bs; ib++) { 2283 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2284 a_val++; 2285 } 2286 } 2287 } 2288 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2289 for (jb = 0; jb < bs; jb++) { 2290 for (ib = 0; ib < bs; ib++) { 2291 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2292 b_val++; 2293 } 2294 } 2295 } 2296 } else if (type == NORM_INFINITY) { 2297 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2298 for (jb = 0; jb < bs; jb++) { 2299 for (ib = 0; ib < bs; ib++) { 2300 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2301 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2302 a_val++; 2303 } 2304 } 2305 } 2306 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2307 for (jb = 0; jb < bs; jb++) { 2308 for (ib = 0; ib < bs; ib++) { 2309 int col = garray[b_aij->j[i]] * bs + jb; 2310 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2311 b_val++; 2312 } 2313 } 2314 } 2315 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2316 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2317 for (jb = 0; jb < bs; jb++) { 2318 for (ib = 0; ib < bs; ib++) { 2319 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2320 a_val++; 2321 } 2322 } 2323 } 2324 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2325 for (jb = 0; jb < bs; jb++) { 2326 for (ib = 0; ib < bs; ib++) { 2327 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2328 b_val++; 2329 } 2330 } 2331 } 2332 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2333 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2334 for (jb = 0; jb < bs; jb++) { 2335 for (ib = 0; ib < bs; ib++) { 2336 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2337 a_val++; 2338 } 2339 } 2340 } 2341 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2342 for (jb = 0; jb < bs; jb++) { 2343 for (ib = 0; ib < bs; ib++) { 2344 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2345 b_val++; 2346 } 2347 } 2348 } 2349 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2350 if (type == NORM_INFINITY) { 2351 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2352 } else { 2353 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2354 } 2355 PetscCall(PetscFree(work)); 2356 if (type == NORM_2) { 2357 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2358 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2359 for (i = 0; i < N; i++) reductions[i] /= m; 2360 } 2361 PetscFunctionReturn(PETSC_SUCCESS); 2362 } 2363 2364 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2365 { 2366 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2367 2368 PetscFunctionBegin; 2369 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2370 A->factorerrortype = a->A->factorerrortype; 2371 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2372 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 2376 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2377 { 2378 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2379 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2380 2381 PetscFunctionBegin; 2382 if (!Y->preallocated) { 2383 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2384 } else if (!aij->nz) { 2385 PetscInt nonew = aij->nonew; 2386 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2387 aij->nonew = nonew; 2388 } 2389 PetscCall(MatShift_Basic(Y, a)); 2390 PetscFunctionReturn(PETSC_SUCCESS); 2391 } 2392 2393 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2394 { 2395 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2396 2397 PetscFunctionBegin; 2398 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2399 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2400 if (d) { 2401 PetscInt rstart; 2402 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2403 *d += rstart / A->rmap->bs; 2404 } 2405 PetscFunctionReturn(PETSC_SUCCESS); 2406 } 2407 2408 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2409 { 2410 PetscFunctionBegin; 2411 *a = ((Mat_MPIBAIJ *)A->data)->A; 2412 PetscFunctionReturn(PETSC_SUCCESS); 2413 } 2414 2415 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2416 { 2417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2418 2419 PetscFunctionBegin; 2420 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2421 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2422 PetscFunctionReturn(PETSC_SUCCESS); 2423 } 2424 2425 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2426 MatGetRow_MPIBAIJ, 2427 MatRestoreRow_MPIBAIJ, 2428 MatMult_MPIBAIJ, 2429 /* 4*/ MatMultAdd_MPIBAIJ, 2430 MatMultTranspose_MPIBAIJ, 2431 MatMultTransposeAdd_MPIBAIJ, 2432 NULL, 2433 NULL, 2434 NULL, 2435 /*10*/ NULL, 2436 NULL, 2437 NULL, 2438 MatSOR_MPIBAIJ, 2439 MatTranspose_MPIBAIJ, 2440 /*15*/ MatGetInfo_MPIBAIJ, 2441 MatEqual_MPIBAIJ, 2442 MatGetDiagonal_MPIBAIJ, 2443 MatDiagonalScale_MPIBAIJ, 2444 MatNorm_MPIBAIJ, 2445 /*20*/ MatAssemblyBegin_MPIBAIJ, 2446 MatAssemblyEnd_MPIBAIJ, 2447 MatSetOption_MPIBAIJ, 2448 MatZeroEntries_MPIBAIJ, 2449 /*24*/ MatZeroRows_MPIBAIJ, 2450 NULL, 2451 NULL, 2452 NULL, 2453 NULL, 2454 /*29*/ MatSetUp_MPI_Hash, 2455 NULL, 2456 NULL, 2457 MatGetDiagonalBlock_MPIBAIJ, 2458 NULL, 2459 /*34*/ MatDuplicate_MPIBAIJ, 2460 NULL, 2461 NULL, 2462 NULL, 2463 NULL, 2464 /*39*/ MatAXPY_MPIBAIJ, 2465 MatCreateSubMatrices_MPIBAIJ, 2466 MatIncreaseOverlap_MPIBAIJ, 2467 MatGetValues_MPIBAIJ, 2468 MatCopy_MPIBAIJ, 2469 /*44*/ NULL, 2470 MatScale_MPIBAIJ, 2471 MatShift_MPIBAIJ, 2472 NULL, 2473 MatZeroRowsColumns_MPIBAIJ, 2474 /*49*/ NULL, 2475 NULL, 2476 NULL, 2477 NULL, 2478 NULL, 2479 /*54*/ MatFDColoringCreate_MPIXAIJ, 2480 NULL, 2481 MatSetUnfactored_MPIBAIJ, 2482 MatPermute_MPIBAIJ, 2483 MatSetValuesBlocked_MPIBAIJ, 2484 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2485 MatDestroy_MPIBAIJ, 2486 MatView_MPIBAIJ, 2487 NULL, 2488 NULL, 2489 /*64*/ NULL, 2490 NULL, 2491 NULL, 2492 NULL, 2493 NULL, 2494 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2495 NULL, 2496 NULL, 2497 NULL, 2498 NULL, 2499 /*74*/ NULL, 2500 MatFDColoringApply_BAIJ, 2501 NULL, 2502 NULL, 2503 NULL, 2504 /*79*/ NULL, 2505 NULL, 2506 NULL, 2507 NULL, 2508 MatLoad_MPIBAIJ, 2509 /*84*/ NULL, 2510 NULL, 2511 NULL, 2512 NULL, 2513 NULL, 2514 /*89*/ NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 NULL, 2519 /*94*/ NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 NULL, 2524 /*99*/ NULL, 2525 NULL, 2526 NULL, 2527 MatConjugate_MPIBAIJ, 2528 NULL, 2529 /*104*/ NULL, 2530 MatRealPart_MPIBAIJ, 2531 MatImaginaryPart_MPIBAIJ, 2532 NULL, 2533 NULL, 2534 /*109*/ NULL, 2535 NULL, 2536 NULL, 2537 NULL, 2538 MatMissingDiagonal_MPIBAIJ, 2539 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2540 NULL, 2541 MatGetGhosts_MPIBAIJ, 2542 NULL, 2543 NULL, 2544 /*119*/ NULL, 2545 NULL, 2546 NULL, 2547 NULL, 2548 MatGetMultiProcBlock_MPIBAIJ, 2549 /*124*/ NULL, 2550 MatGetColumnReductions_MPIBAIJ, 2551 MatInvertBlockDiagonal_MPIBAIJ, 2552 NULL, 2553 NULL, 2554 /*129*/ NULL, 2555 NULL, 2556 NULL, 2557 NULL, 2558 NULL, 2559 /*134*/ NULL, 2560 NULL, 2561 NULL, 2562 NULL, 2563 NULL, 2564 /*139*/ MatSetBlockSizes_Default, 2565 NULL, 2566 NULL, 2567 MatFDColoringSetUp_MPIXAIJ, 2568 NULL, 2569 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2570 NULL, 2571 NULL, 2572 NULL, 2573 NULL, 2574 NULL, 2575 /*150*/ NULL, 2576 MatEliminateZeros_MPIBAIJ, 2577 NULL}; 2578 2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2581 2582 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2583 { 2584 PetscInt m, rstart, cstart, cend; 2585 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2586 const PetscInt *JJ = NULL; 2587 PetscScalar *values = NULL; 2588 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2589 PetscBool nooffprocentries; 2590 2591 PetscFunctionBegin; 2592 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2593 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2594 PetscCall(PetscLayoutSetUp(B->rmap)); 2595 PetscCall(PetscLayoutSetUp(B->cmap)); 2596 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2597 m = B->rmap->n / bs; 2598 rstart = B->rmap->rstart / bs; 2599 cstart = B->cmap->rstart / bs; 2600 cend = B->cmap->rend / bs; 2601 2602 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2603 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2604 for (i = 0; i < m; i++) { 2605 nz = ii[i + 1] - ii[i]; 2606 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2607 nz_max = PetscMax(nz_max, nz); 2608 dlen = 0; 2609 olen = 0; 2610 JJ = jj + ii[i]; 2611 for (j = 0; j < nz; j++) { 2612 if (*JJ < cstart || *JJ >= cend) olen++; 2613 else dlen++; 2614 JJ++; 2615 } 2616 d_nnz[i] = dlen; 2617 o_nnz[i] = olen; 2618 } 2619 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2620 PetscCall(PetscFree2(d_nnz, o_nnz)); 2621 2622 values = (PetscScalar *)V; 2623 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2624 for (i = 0; i < m; i++) { 2625 PetscInt row = i + rstart; 2626 PetscInt ncols = ii[i + 1] - ii[i]; 2627 const PetscInt *icols = jj + ii[i]; 2628 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2629 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2630 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2631 } else { /* block ordering does not match so we can only insert one block at a time. */ 2632 PetscInt j; 2633 for (j = 0; j < ncols; j++) { 2634 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2635 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2636 } 2637 } 2638 } 2639 2640 if (!V) PetscCall(PetscFree(values)); 2641 nooffprocentries = B->nooffprocentries; 2642 B->nooffprocentries = PETSC_TRUE; 2643 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2644 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2645 B->nooffprocentries = nooffprocentries; 2646 2647 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2648 PetscFunctionReturn(PETSC_SUCCESS); 2649 } 2650 2651 /*@C 2652 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2653 2654 Collective 2655 2656 Input Parameters: 2657 + B - the matrix 2658 . bs - the block size 2659 . i - the indices into `j` for the start of each local row (starts with zero) 2660 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2661 - v - optional values in the matrix 2662 2663 Level: advanced 2664 2665 Notes: 2666 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2667 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2668 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2669 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2670 block column and the second index is over columns within a block. 2671 2672 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2673 2674 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2675 @*/ 2676 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2677 { 2678 PetscFunctionBegin; 2679 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2680 PetscValidType(B, 1); 2681 PetscValidLogicalCollectiveInt(B, bs, 2); 2682 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2683 PetscFunctionReturn(PETSC_SUCCESS); 2684 } 2685 2686 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2687 { 2688 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2689 PetscInt i; 2690 PetscMPIInt size; 2691 2692 PetscFunctionBegin; 2693 if (B->hash_active) { 2694 B->ops[0] = b->cops; 2695 B->hash_active = PETSC_FALSE; 2696 } 2697 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2698 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2699 PetscCall(PetscLayoutSetUp(B->rmap)); 2700 PetscCall(PetscLayoutSetUp(B->cmap)); 2701 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2702 2703 if (d_nnz) { 2704 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2705 } 2706 if (o_nnz) { 2707 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2708 } 2709 2710 b->bs2 = bs * bs; 2711 b->mbs = B->rmap->n / bs; 2712 b->nbs = B->cmap->n / bs; 2713 b->Mbs = B->rmap->N / bs; 2714 b->Nbs = B->cmap->N / bs; 2715 2716 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2717 b->rstartbs = B->rmap->rstart / bs; 2718 b->rendbs = B->rmap->rend / bs; 2719 b->cstartbs = B->cmap->rstart / bs; 2720 b->cendbs = B->cmap->rend / bs; 2721 2722 #if defined(PETSC_USE_CTABLE) 2723 PetscCall(PetscHMapIDestroy(&b->colmap)); 2724 #else 2725 PetscCall(PetscFree(b->colmap)); 2726 #endif 2727 PetscCall(PetscFree(b->garray)); 2728 PetscCall(VecDestroy(&b->lvec)); 2729 PetscCall(VecScatterDestroy(&b->Mvctx)); 2730 2731 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2732 2733 MatSeqXAIJGetOptions_Private(b->B); 2734 PetscCall(MatDestroy(&b->B)); 2735 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2736 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2737 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2738 MatSeqXAIJRestoreOptions_Private(b->B); 2739 2740 MatSeqXAIJGetOptions_Private(b->A); 2741 PetscCall(MatDestroy(&b->A)); 2742 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2743 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2744 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2745 MatSeqXAIJRestoreOptions_Private(b->A); 2746 2747 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2748 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2749 B->preallocated = PETSC_TRUE; 2750 B->was_assembled = PETSC_FALSE; 2751 B->assembled = PETSC_FALSE; 2752 PetscFunctionReturn(PETSC_SUCCESS); 2753 } 2754 2755 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2756 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2757 2758 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2759 { 2760 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2761 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2762 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2763 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2764 2765 PetscFunctionBegin; 2766 PetscCall(PetscMalloc1(M + 1, &ii)); 2767 ii[0] = 0; 2768 for (i = 0; i < M; i++) { 2769 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2770 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2771 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2772 /* remove one from count of matrix has diagonal */ 2773 for (j = id[i]; j < id[i + 1]; j++) { 2774 if (jd[j] == i) { 2775 ii[i + 1]--; 2776 break; 2777 } 2778 } 2779 } 2780 PetscCall(PetscMalloc1(ii[M], &jj)); 2781 cnt = 0; 2782 for (i = 0; i < M; i++) { 2783 for (j = io[i]; j < io[i + 1]; j++) { 2784 if (garray[jo[j]] > rstart) break; 2785 jj[cnt++] = garray[jo[j]]; 2786 } 2787 for (k = id[i]; k < id[i + 1]; k++) { 2788 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2789 } 2790 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2791 } 2792 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2793 PetscFunctionReturn(PETSC_SUCCESS); 2794 } 2795 2796 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2797 2798 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2799 2800 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2801 { 2802 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2803 Mat_MPIAIJ *b; 2804 Mat B; 2805 2806 PetscFunctionBegin; 2807 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2808 2809 if (reuse == MAT_REUSE_MATRIX) { 2810 B = *newmat; 2811 } else { 2812 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2813 PetscCall(MatSetType(B, MATMPIAIJ)); 2814 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2815 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2816 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2817 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2818 } 2819 b = (Mat_MPIAIJ *)B->data; 2820 2821 if (reuse == MAT_REUSE_MATRIX) { 2822 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2823 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2824 } else { 2825 PetscInt *garray = a->garray; 2826 Mat_SeqAIJ *bB; 2827 PetscInt bs, nnz; 2828 PetscCall(MatDestroy(&b->A)); 2829 PetscCall(MatDestroy(&b->B)); 2830 /* just clear out the data structure */ 2831 PetscCall(MatDisAssemble_MPIAIJ(B)); 2832 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2834 2835 /* Global numbering for b->B columns */ 2836 bB = (Mat_SeqAIJ *)b->B->data; 2837 bs = A->rmap->bs; 2838 nnz = bB->i[A->rmap->n]; 2839 for (PetscInt k = 0; k < nnz; k++) { 2840 PetscInt bj = bB->j[k] / bs; 2841 PetscInt br = bB->j[k] % bs; 2842 bB->j[k] = garray[bj] * bs + br; 2843 } 2844 } 2845 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2846 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2847 2848 if (reuse == MAT_INPLACE_MATRIX) { 2849 PetscCall(MatHeaderReplace(A, &B)); 2850 } else { 2851 *newmat = B; 2852 } 2853 PetscFunctionReturn(PETSC_SUCCESS); 2854 } 2855 2856 /*MC 2857 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2858 2859 Options Database Keys: 2860 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2861 . -mat_block_size <bs> - set the blocksize used to store the matrix 2862 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2863 - -mat_use_hash_table <fact> - set hash table factor 2864 2865 Level: beginner 2866 2867 Note: 2868 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2869 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2870 2871 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2872 M*/ 2873 2874 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2875 2876 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2877 { 2878 Mat_MPIBAIJ *b; 2879 PetscBool flg = PETSC_FALSE; 2880 2881 PetscFunctionBegin; 2882 PetscCall(PetscNew(&b)); 2883 B->data = (void *)b; 2884 B->ops[0] = MatOps_Values; 2885 B->assembled = PETSC_FALSE; 2886 2887 B->insertmode = NOT_SET_VALUES; 2888 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2889 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2890 2891 /* build local table of row and column ownerships */ 2892 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2893 2894 /* build cache for off array entries formed */ 2895 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2896 2897 b->donotstash = PETSC_FALSE; 2898 b->colmap = NULL; 2899 b->garray = NULL; 2900 b->roworiented = PETSC_TRUE; 2901 2902 /* stuff used in block assembly */ 2903 b->barray = NULL; 2904 2905 /* stuff used for matrix vector multiply */ 2906 b->lvec = NULL; 2907 b->Mvctx = NULL; 2908 2909 /* stuff for MatGetRow() */ 2910 b->rowindices = NULL; 2911 b->rowvalues = NULL; 2912 b->getrowactive = PETSC_FALSE; 2913 2914 /* hash table stuff */ 2915 b->ht = NULL; 2916 b->hd = NULL; 2917 b->ht_size = 0; 2918 b->ht_flag = PETSC_FALSE; 2919 b->ht_fact = 0; 2920 b->ht_total_ct = 0; 2921 b->ht_insert_ct = 0; 2922 2923 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2924 b->ijonly = PETSC_FALSE; 2925 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2929 #if defined(PETSC_HAVE_HYPRE) 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2931 #endif 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2939 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2940 2941 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2942 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2943 if (flg) { 2944 PetscReal fact = 1.39; 2945 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2946 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2947 if (fact <= 1.0) fact = 1.39; 2948 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2949 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2950 } 2951 PetscOptionsEnd(); 2952 PetscFunctionReturn(PETSC_SUCCESS); 2953 } 2954 2955 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2956 /*MC 2957 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2958 2959 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2960 and `MATMPIBAIJ` otherwise. 2961 2962 Options Database Keys: 2963 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2964 2965 Level: beginner 2966 2967 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2968 M*/ 2969 2970 /*@C 2971 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2972 (block compressed row). 2973 2974 Collective 2975 2976 Input Parameters: 2977 + B - the matrix 2978 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2979 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2980 . d_nz - number of block nonzeros per block row in diagonal portion of local 2981 submatrix (same for all local rows) 2982 . d_nnz - array containing the number of block nonzeros in the various block rows 2983 of the in diagonal portion of the local (possibly different for each block 2984 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2985 set it even if it is zero. 2986 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2987 submatrix (same for all local rows). 2988 - o_nnz - array containing the number of nonzeros in the various block rows of the 2989 off-diagonal portion of the local submatrix (possibly different for 2990 each block row) or `NULL`. 2991 2992 If the *_nnz parameter is given then the *_nz parameter is ignored 2993 2994 Options Database Keys: 2995 + -mat_block_size - size of the blocks to use 2996 - -mat_use_hash_table <fact> - set hash table factor 2997 2998 Level: intermediate 2999 3000 Notes: 3001 For good matrix assembly performance 3002 the user should preallocate the matrix storage by setting the parameters 3003 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3004 performance can be increased by more than a factor of 50. 3005 3006 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3007 than it must be used on all processors that share the object for that argument. 3008 3009 Storage Information: 3010 For a square global matrix we define each processor's diagonal portion 3011 to be its local rows and the corresponding columns (a square submatrix); 3012 each processor's off-diagonal portion encompasses the remainder of the 3013 local matrix (a rectangular submatrix). 3014 3015 The user can specify preallocated storage for the diagonal part of 3016 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3017 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3018 memory allocation. Likewise, specify preallocated storage for the 3019 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3020 3021 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3022 the figure below we depict these three local rows and all columns (0-11). 3023 3024 .vb 3025 0 1 2 3 4 5 6 7 8 9 10 11 3026 -------------------------- 3027 row 3 |o o o d d d o o o o o o 3028 row 4 |o o o d d d o o o o o o 3029 row 5 |o o o d d d o o o o o o 3030 -------------------------- 3031 .ve 3032 3033 Thus, any entries in the d locations are stored in the d (diagonal) 3034 submatrix, and any entries in the o locations are stored in the 3035 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3036 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3037 3038 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3039 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3040 In general, for PDE problems in which most nonzeros are near the diagonal, 3041 one expects `d_nz` >> `o_nz`. 3042 3043 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3044 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3045 You can also run with the option `-info` and look for messages with the string 3046 malloc in them to see if additional memory allocation was needed. 3047 3048 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3049 @*/ 3050 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3051 { 3052 PetscFunctionBegin; 3053 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3054 PetscValidType(B, 1); 3055 PetscValidLogicalCollectiveInt(B, bs, 2); 3056 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3057 PetscFunctionReturn(PETSC_SUCCESS); 3058 } 3059 3060 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3061 /*@C 3062 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3063 (block compressed row). 3064 3065 Collective 3066 3067 Input Parameters: 3068 + comm - MPI communicator 3069 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3070 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3071 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3072 This value should be the same as the local size used in creating the 3073 y vector for the matrix-vector product y = Ax. 3074 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3075 This value should be the same as the local size used in creating the 3076 x vector for the matrix-vector product y = Ax. 3077 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3078 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3079 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3080 submatrix (same for all local rows) 3081 . d_nnz - array containing the number of nonzero blocks in the various block rows 3082 of the in diagonal portion of the local (possibly different for each block 3083 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3084 and set it even if it is zero. 3085 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3086 submatrix (same for all local rows). 3087 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3088 off-diagonal portion of the local submatrix (possibly different for 3089 each block row) or NULL. 3090 3091 Output Parameter: 3092 . A - the matrix 3093 3094 Options Database Keys: 3095 + -mat_block_size - size of the blocks to use 3096 - -mat_use_hash_table <fact> - set hash table factor 3097 3098 Level: intermediate 3099 3100 Notes: 3101 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3102 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3103 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3104 3105 For good matrix assembly performance 3106 the user should preallocate the matrix storage by setting the parameters 3107 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3108 performance can be increased by more than a factor of 50. 3109 3110 If the *_nnz parameter is given then the *_nz parameter is ignored 3111 3112 A nonzero block is any block that as 1 or more nonzeros in it 3113 3114 The user MUST specify either the local or global matrix dimensions 3115 (possibly both). 3116 3117 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3118 than it must be used on all processors that share the object for that argument. 3119 3120 Storage Information: 3121 For a square global matrix we define each processor's diagonal portion 3122 to be its local rows and the corresponding columns (a square submatrix); 3123 each processor's off-diagonal portion encompasses the remainder of the 3124 local matrix (a rectangular submatrix). 3125 3126 The user can specify preallocated storage for the diagonal part of 3127 the local submatrix with either d_nz or d_nnz (not both). Set 3128 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3129 memory allocation. Likewise, specify preallocated storage for the 3130 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3131 3132 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3133 the figure below we depict these three local rows and all columns (0-11). 3134 3135 .vb 3136 0 1 2 3 4 5 6 7 8 9 10 11 3137 -------------------------- 3138 row 3 |o o o d d d o o o o o o 3139 row 4 |o o o d d d o o o o o o 3140 row 5 |o o o d d d o o o o o o 3141 -------------------------- 3142 .ve 3143 3144 Thus, any entries in the d locations are stored in the d (diagonal) 3145 submatrix, and any entries in the o locations are stored in the 3146 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3147 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3148 3149 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3150 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3151 In general, for PDE problems in which most nonzeros are near the diagonal, 3152 one expects `d_nz` >> `o_nz`. 3153 3154 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3155 @*/ 3156 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3157 { 3158 PetscMPIInt size; 3159 3160 PetscFunctionBegin; 3161 PetscCall(MatCreate(comm, A)); 3162 PetscCall(MatSetSizes(*A, m, n, M, N)); 3163 PetscCallMPI(MPI_Comm_size(comm, &size)); 3164 if (size > 1) { 3165 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3166 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3167 } else { 3168 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3169 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3170 } 3171 PetscFunctionReturn(PETSC_SUCCESS); 3172 } 3173 3174 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3175 { 3176 Mat mat; 3177 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3178 PetscInt len = 0; 3179 3180 PetscFunctionBegin; 3181 *newmat = NULL; 3182 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3183 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3184 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3185 3186 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3187 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3188 if (matin->hash_active) { 3189 PetscCall(MatSetUp(mat)); 3190 } else { 3191 mat->factortype = matin->factortype; 3192 mat->preallocated = PETSC_TRUE; 3193 mat->assembled = PETSC_TRUE; 3194 mat->insertmode = NOT_SET_VALUES; 3195 3196 a = (Mat_MPIBAIJ *)mat->data; 3197 mat->rmap->bs = matin->rmap->bs; 3198 a->bs2 = oldmat->bs2; 3199 a->mbs = oldmat->mbs; 3200 a->nbs = oldmat->nbs; 3201 a->Mbs = oldmat->Mbs; 3202 a->Nbs = oldmat->Nbs; 3203 3204 a->size = oldmat->size; 3205 a->rank = oldmat->rank; 3206 a->donotstash = oldmat->donotstash; 3207 a->roworiented = oldmat->roworiented; 3208 a->rowindices = NULL; 3209 a->rowvalues = NULL; 3210 a->getrowactive = PETSC_FALSE; 3211 a->barray = NULL; 3212 a->rstartbs = oldmat->rstartbs; 3213 a->rendbs = oldmat->rendbs; 3214 a->cstartbs = oldmat->cstartbs; 3215 a->cendbs = oldmat->cendbs; 3216 3217 /* hash table stuff */ 3218 a->ht = NULL; 3219 a->hd = NULL; 3220 a->ht_size = 0; 3221 a->ht_flag = oldmat->ht_flag; 3222 a->ht_fact = oldmat->ht_fact; 3223 a->ht_total_ct = 0; 3224 a->ht_insert_ct = 0; 3225 3226 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3227 if (oldmat->colmap) { 3228 #if defined(PETSC_USE_CTABLE) 3229 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3230 #else 3231 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3232 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3233 #endif 3234 } else a->colmap = NULL; 3235 3236 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3237 PetscCall(PetscMalloc1(len, &a->garray)); 3238 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3239 } else a->garray = NULL; 3240 3241 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3242 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3243 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3244 3245 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3246 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3247 } 3248 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3249 *newmat = mat; 3250 PetscFunctionReturn(PETSC_SUCCESS); 3251 } 3252 3253 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3254 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3255 { 3256 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3257 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3258 PetscScalar *matvals; 3259 3260 PetscFunctionBegin; 3261 PetscCall(PetscViewerSetUp(viewer)); 3262 3263 /* read in matrix header */ 3264 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3265 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3266 M = header[1]; 3267 N = header[2]; 3268 nz = header[3]; 3269 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3270 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3271 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3272 3273 /* set block sizes from the viewer's .info file */ 3274 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3275 /* set local sizes if not set already */ 3276 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3277 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3278 /* set global sizes if not set already */ 3279 if (mat->rmap->N < 0) mat->rmap->N = M; 3280 if (mat->cmap->N < 0) mat->cmap->N = N; 3281 PetscCall(PetscLayoutSetUp(mat->rmap)); 3282 PetscCall(PetscLayoutSetUp(mat->cmap)); 3283 3284 /* check if the matrix sizes are correct */ 3285 PetscCall(MatGetSize(mat, &rows, &cols)); 3286 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3287 PetscCall(MatGetBlockSize(mat, &bs)); 3288 PetscCall(MatGetLocalSize(mat, &m, &n)); 3289 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3290 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3291 mbs = m / bs; 3292 nbs = n / bs; 3293 3294 /* read in row lengths and build row indices */ 3295 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3296 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3297 rowidxs[0] = 0; 3298 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3299 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3300 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3301 3302 /* read in column indices and matrix values */ 3303 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3304 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3305 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3306 3307 { /* preallocate matrix storage */ 3308 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3309 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3310 PetscBool sbaij, done; 3311 PetscInt *d_nnz, *o_nnz; 3312 3313 PetscCall(PetscBTCreate(nbs, &bt)); 3314 PetscCall(PetscHSetICreate(&ht)); 3315 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3316 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3317 for (i = 0; i < mbs; i++) { 3318 PetscCall(PetscBTMemzero(nbs, bt)); 3319 PetscCall(PetscHSetIClear(ht)); 3320 for (k = 0; k < bs; k++) { 3321 PetscInt row = bs * i + k; 3322 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3323 PetscInt col = colidxs[j]; 3324 if (!sbaij || col >= row) { 3325 if (col >= cs && col < ce) { 3326 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3327 } else { 3328 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3329 if (done) o_nnz[i]++; 3330 } 3331 } 3332 } 3333 } 3334 } 3335 PetscCall(PetscBTDestroy(&bt)); 3336 PetscCall(PetscHSetIDestroy(&ht)); 3337 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3338 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3339 PetscCall(PetscFree2(d_nnz, o_nnz)); 3340 } 3341 3342 /* store matrix values */ 3343 for (i = 0; i < m; i++) { 3344 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3345 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3346 } 3347 3348 PetscCall(PetscFree(rowidxs)); 3349 PetscCall(PetscFree2(colidxs, matvals)); 3350 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3351 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3352 PetscFunctionReturn(PETSC_SUCCESS); 3353 } 3354 3355 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3356 { 3357 PetscBool isbinary; 3358 3359 PetscFunctionBegin; 3360 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3361 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3362 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3363 PetscFunctionReturn(PETSC_SUCCESS); 3364 } 3365 3366 /*@ 3367 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3368 3369 Input Parameters: 3370 + mat - the matrix 3371 - fact - factor 3372 3373 Options Database Key: 3374 . -mat_use_hash_table <fact> - provide the factor 3375 3376 Level: advanced 3377 3378 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3379 @*/ 3380 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3381 { 3382 PetscFunctionBegin; 3383 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3384 PetscFunctionReturn(PETSC_SUCCESS); 3385 } 3386 3387 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3388 { 3389 Mat_MPIBAIJ *baij; 3390 3391 PetscFunctionBegin; 3392 baij = (Mat_MPIBAIJ *)mat->data; 3393 baij->ht_fact = fact; 3394 PetscFunctionReturn(PETSC_SUCCESS); 3395 } 3396 3397 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3398 { 3399 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3400 PetscBool flg; 3401 3402 PetscFunctionBegin; 3403 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3404 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3405 if (Ad) *Ad = a->A; 3406 if (Ao) *Ao = a->B; 3407 if (colmap) *colmap = a->garray; 3408 PetscFunctionReturn(PETSC_SUCCESS); 3409 } 3410 3411 /* 3412 Special version for direct calls from Fortran (to eliminate two function call overheads 3413 */ 3414 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3415 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3416 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3417 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3418 #endif 3419 3420 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3421 /*@C 3422 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3423 3424 Collective 3425 3426 Input Parameters: 3427 + matin - the matrix 3428 . min - number of input rows 3429 . im - input rows 3430 . nin - number of input columns 3431 . in - input columns 3432 . v - numerical values input 3433 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3434 3435 Level: advanced 3436 3437 Developer Notes: 3438 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3439 3440 .seealso: `Mat`, `MatSetValuesBlocked()` 3441 @*/ 3442 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3443 { 3444 /* convert input arguments to C version */ 3445 Mat mat = *matin; 3446 PetscInt m = *min, n = *nin; 3447 InsertMode addv = *addvin; 3448 3449 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3450 const MatScalar *value; 3451 MatScalar *barray = baij->barray; 3452 PetscBool roworiented = baij->roworiented; 3453 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3454 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3455 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3456 3457 PetscFunctionBegin; 3458 /* tasks normally handled by MatSetValuesBlocked() */ 3459 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3460 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3461 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3462 if (mat->assembled) { 3463 mat->was_assembled = PETSC_TRUE; 3464 mat->assembled = PETSC_FALSE; 3465 } 3466 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3467 3468 if (!barray) { 3469 PetscCall(PetscMalloc1(bs2, &barray)); 3470 baij->barray = barray; 3471 } 3472 3473 if (roworiented) stepval = (n - 1) * bs; 3474 else stepval = (m - 1) * bs; 3475 3476 for (i = 0; i < m; i++) { 3477 if (im[i] < 0) continue; 3478 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3479 if (im[i] >= rstart && im[i] < rend) { 3480 row = im[i] - rstart; 3481 for (j = 0; j < n; j++) { 3482 /* If NumCol = 1 then a copy is not required */ 3483 if ((roworiented) && (n == 1)) { 3484 barray = (MatScalar *)v + i * bs2; 3485 } else if ((!roworiented) && (m == 1)) { 3486 barray = (MatScalar *)v + j * bs2; 3487 } else { /* Here a copy is required */ 3488 if (roworiented) { 3489 value = v + i * (stepval + bs) * bs + j * bs; 3490 } else { 3491 value = v + j * (stepval + bs) * bs + i * bs; 3492 } 3493 for (ii = 0; ii < bs; ii++, value += stepval) { 3494 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3495 } 3496 barray -= bs2; 3497 } 3498 3499 if (in[j] >= cstart && in[j] < cend) { 3500 col = in[j] - cstart; 3501 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3502 } else if (in[j] < 0) { 3503 continue; 3504 } else { 3505 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3506 if (mat->was_assembled) { 3507 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3508 3509 #if defined(PETSC_USE_DEBUG) 3510 #if defined(PETSC_USE_CTABLE) 3511 { 3512 PetscInt data; 3513 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3514 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3515 } 3516 #else 3517 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3518 #endif 3519 #endif 3520 #if defined(PETSC_USE_CTABLE) 3521 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3522 col = (col - 1) / bs; 3523 #else 3524 col = (baij->colmap[in[j]] - 1) / bs; 3525 #endif 3526 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3527 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3528 col = in[j]; 3529 } 3530 } else col = in[j]; 3531 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3532 } 3533 } 3534 } else { 3535 if (!baij->donotstash) { 3536 if (roworiented) { 3537 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3538 } else { 3539 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3540 } 3541 } 3542 } 3543 } 3544 3545 /* task normally handled by MatSetValuesBlocked() */ 3546 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3547 PetscFunctionReturn(PETSC_SUCCESS); 3548 } 3549 3550 /*@ 3551 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3552 CSR format the local rows. 3553 3554 Collective 3555 3556 Input Parameters: 3557 + comm - MPI communicator 3558 . bs - the block size, only a block size of 1 is supported 3559 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3560 . n - This value should be the same as the local size used in creating the 3561 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3562 calculated if N is given) For square matrices n is almost always m. 3563 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3564 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3565 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3566 . j - column indices 3567 - a - matrix values 3568 3569 Output Parameter: 3570 . mat - the matrix 3571 3572 Level: intermediate 3573 3574 Notes: 3575 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3576 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3577 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3578 3579 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3580 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3581 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3582 with column-major ordering within blocks. 3583 3584 The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array. 3585 3586 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3587 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3588 @*/ 3589 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3590 { 3591 PetscFunctionBegin; 3592 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3593 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3594 PetscCall(MatCreate(comm, mat)); 3595 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3596 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3597 PetscCall(MatSetBlockSize(*mat, bs)); 3598 PetscCall(MatSetUp(*mat)); 3599 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3600 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3601 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3602 PetscFunctionReturn(PETSC_SUCCESS); 3603 } 3604 3605 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3606 { 3607 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3608 PetscInt *indx; 3609 PetscScalar *values; 3610 3611 PetscFunctionBegin; 3612 PetscCall(MatGetSize(inmat, &m, &N)); 3613 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3614 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3615 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3616 PetscInt *bindx, rmax = a->rmax, j; 3617 PetscMPIInt rank, size; 3618 3619 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3620 mbs = m / bs; 3621 Nbs = N / cbs; 3622 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3623 nbs = n / cbs; 3624 3625 PetscCall(PetscMalloc1(rmax, &bindx)); 3626 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3627 3628 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3629 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3630 if (rank == size - 1) { 3631 /* Check sum(nbs) = Nbs */ 3632 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3633 } 3634 3635 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3636 for (i = 0; i < mbs; i++) { 3637 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3638 nnz = nnz / bs; 3639 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3640 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3641 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3642 } 3643 PetscCall(PetscFree(bindx)); 3644 3645 PetscCall(MatCreate(comm, outmat)); 3646 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3647 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3648 PetscCall(MatSetType(*outmat, MATBAIJ)); 3649 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3650 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3651 MatPreallocateEnd(dnz, onz); 3652 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3653 } 3654 3655 /* numeric phase */ 3656 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3657 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3658 3659 for (i = 0; i < m; i++) { 3660 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3661 Ii = i + rstart; 3662 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3663 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3664 } 3665 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3666 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3667 PetscFunctionReturn(PETSC_SUCCESS); 3668 } 3669