1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 #if defined(PETSC_USE_LOG) 13 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 14 #endif 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(MatStashDestroy_Private(&mat->bstash)); 17 PetscCall(MatDestroy(&baij->A)); 18 PetscCall(MatDestroy(&baij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&baij->colmap)); 21 #else 22 PetscCall(PetscFree(baij->colmap)); 23 #endif 24 PetscCall(PetscFree(baij->garray)); 25 PetscCall(VecDestroy(&baij->lvec)); 26 PetscCall(VecScatterDestroy(&baij->Mvctx)); 27 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 28 PetscCall(PetscFree(baij->barray)); 29 PetscCall(PetscFree2(baij->hd, baij->ht)); 30 PetscCall(PetscFree(baij->rangebs)); 31 PetscCall(PetscFree(mat->data)); 32 33 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 43 #if defined(PETSC_HAVE_HYPRE) 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 45 #endif 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 51 #define TYPE BAIJ 52 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 53 #undef TYPE 54 55 #if defined(PETSC_HAVE_HYPRE) 56 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 57 #endif 58 59 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 60 { 61 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 62 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 63 PetscScalar *va, *vv; 64 Vec vB, vA; 65 const PetscScalar *vb; 66 67 PetscFunctionBegin; 68 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 69 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 70 71 PetscCall(VecGetArrayWrite(vA, &va)); 72 if (idx) { 73 for (i = 0; i < m; i++) { 74 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 75 } 76 } 77 78 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 79 PetscCall(PetscMalloc1(m, &idxb)); 80 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 81 82 PetscCall(VecGetArrayWrite(v, &vv)); 83 PetscCall(VecGetArrayRead(vB, &vb)); 84 for (i = 0; i < m; i++) { 85 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 86 vv[i] = vb[i]; 87 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 88 } else { 89 vv[i] = va[i]; 90 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 91 } 92 } 93 PetscCall(VecRestoreArrayWrite(vA, &vv)); 94 PetscCall(VecRestoreArrayWrite(vA, &va)); 95 PetscCall(VecRestoreArrayRead(vB, &vb)); 96 PetscCall(PetscFree(idxb)); 97 PetscCall(VecDestroy(&vA)); 98 PetscCall(VecDestroy(&vB)); 99 PetscFunctionReturn(PETSC_SUCCESS); 100 } 101 102 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 103 { 104 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 105 106 PetscFunctionBegin; 107 PetscCall(MatStoreValues(aij->A)); 108 PetscCall(MatStoreValues(aij->B)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 113 { 114 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 115 116 PetscFunctionBegin; 117 PetscCall(MatRetrieveValues(aij->A)); 118 PetscCall(MatRetrieveValues(aij->B)); 119 PetscFunctionReturn(PETSC_SUCCESS); 120 } 121 122 /* 123 Local utility routine that creates a mapping from the global column 124 number to the local number in the off-diagonal part of the local 125 storage of the matrix. This is done in a non scalable way since the 126 length of colmap equals the global matrix length. 127 */ 128 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 129 { 130 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 131 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 132 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 133 134 PetscFunctionBegin; 135 #if defined(PETSC_USE_CTABLE) 136 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 137 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 138 #else 139 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 140 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 141 #endif 142 PetscFunctionReturn(PETSC_SUCCESS); 143 } 144 145 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 146 { \ 147 brow = row / bs; \ 148 rp = aj + ai[brow]; \ 149 ap = aa + bs2 * ai[brow]; \ 150 rmax = aimax[brow]; \ 151 nrow = ailen[brow]; \ 152 bcol = col / bs; \ 153 ridx = row % bs; \ 154 cidx = col % bs; \ 155 low = 0; \ 156 high = nrow; \ 157 while (high - low > 3) { \ 158 t = (low + high) / 2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i = low; _i < high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2 * _i + bs * cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto a_noinsert; \ 169 } \ 170 } \ 171 if (a->nonew == 1) goto a_noinsert; \ 172 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 173 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 174 N = nrow++ - 1; \ 175 /* shift up all the later entries in this row */ \ 176 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 177 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 178 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 179 rp[_i] = bcol; \ 180 ap[bs2 * _i + bs * cidx + ridx] = value; \ 181 a_noinsert:; \ 182 ailen[brow] = nrow; \ 183 } 184 185 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 186 { \ 187 brow = row / bs; \ 188 rp = bj + bi[brow]; \ 189 ap = ba + bs2 * bi[brow]; \ 190 rmax = bimax[brow]; \ 191 nrow = bilen[brow]; \ 192 bcol = col / bs; \ 193 ridx = row % bs; \ 194 cidx = col % bs; \ 195 low = 0; \ 196 high = nrow; \ 197 while (high - low > 3) { \ 198 t = (low + high) / 2; \ 199 if (rp[t] > bcol) high = t; \ 200 else low = t; \ 201 } \ 202 for (_i = low; _i < high; _i++) { \ 203 if (rp[_i] > bcol) break; \ 204 if (rp[_i] == bcol) { \ 205 bap = ap + bs2 * _i + bs * cidx + ridx; \ 206 if (addv == ADD_VALUES) *bap += value; \ 207 else *bap = value; \ 208 goto b_noinsert; \ 209 } \ 210 } \ 211 if (b->nonew == 1) goto b_noinsert; \ 212 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 213 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 214 N = nrow++ - 1; \ 215 /* shift up all the later entries in this row */ \ 216 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 217 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 218 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 219 rp[_i] = bcol; \ 220 ap[bs2 * _i + bs * cidx + ridx] = value; \ 221 b_noinsert:; \ 222 bilen[brow] = nrow; \ 223 } 224 225 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 226 { 227 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 228 MatScalar value; 229 PetscBool roworiented = baij->roworiented; 230 PetscInt i, j, row, col; 231 PetscInt rstart_orig = mat->rmap->rstart; 232 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 233 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 234 235 /* Some Variables required in the macro */ 236 Mat A = baij->A; 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 238 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 239 MatScalar *aa = a->a; 240 241 Mat B = baij->B; 242 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 243 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 244 MatScalar *ba = b->a; 245 246 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 247 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 248 MatScalar *ap, *bap; 249 250 PetscFunctionBegin; 251 for (i = 0; i < m; i++) { 252 if (im[i] < 0) continue; 253 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 254 if (im[i] >= rstart_orig && im[i] < rend_orig) { 255 row = im[i] - rstart_orig; 256 for (j = 0; j < n; j++) { 257 if (in[j] >= cstart_orig && in[j] < cend_orig) { 258 col = in[j] - cstart_orig; 259 if (roworiented) value = v[i * n + j]; 260 else value = v[i + j * m]; 261 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 262 } else if (in[j] < 0) { 263 continue; 264 } else { 265 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 266 if (mat->was_assembled) { 267 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 268 #if defined(PETSC_USE_CTABLE) 269 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 270 col = col - 1; 271 #else 272 col = baij->colmap[in[j] / bs] - 1; 273 #endif 274 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 275 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 276 col = in[j]; 277 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 278 B = baij->B; 279 b = (Mat_SeqBAIJ *)(B)->data; 280 bimax = b->imax; 281 bi = b->i; 282 bilen = b->ilen; 283 bj = b->j; 284 ba = b->a; 285 } else { 286 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 287 col += in[j] % bs; 288 } 289 } else col = in[j]; 290 if (roworiented) value = v[i * n + j]; 291 else value = v[i + j * m]; 292 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 293 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 294 } 295 } 296 } else { 297 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 298 if (!baij->donotstash) { 299 mat->assembled = PETSC_FALSE; 300 if (roworiented) { 301 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 302 } else { 303 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 304 } 305 } 306 } 307 } 308 PetscFunctionReturn(PETSC_SUCCESS); 309 } 310 311 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 312 { 313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 314 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 315 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 316 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 317 PetscBool roworiented = a->roworiented; 318 const PetscScalar *value = v; 319 MatScalar *ap, *aa = a->a, *bap; 320 321 PetscFunctionBegin; 322 rp = aj + ai[row]; 323 ap = aa + bs2 * ai[row]; 324 rmax = imax[row]; 325 nrow = ailen[row]; 326 value = v; 327 low = 0; 328 high = nrow; 329 while (high - low > 7) { 330 t = (low + high) / 2; 331 if (rp[t] > col) high = t; 332 else low = t; 333 } 334 for (i = low; i < high; i++) { 335 if (rp[i] > col) break; 336 if (rp[i] == col) { 337 bap = ap + bs2 * i; 338 if (roworiented) { 339 if (is == ADD_VALUES) { 340 for (ii = 0; ii < bs; ii++) { 341 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 342 } 343 } else { 344 for (ii = 0; ii < bs; ii++) { 345 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 346 } 347 } 348 } else { 349 if (is == ADD_VALUES) { 350 for (ii = 0; ii < bs; ii++, value += bs) { 351 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 352 bap += bs; 353 } 354 } else { 355 for (ii = 0; ii < bs; ii++, value += bs) { 356 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 357 bap += bs; 358 } 359 } 360 } 361 goto noinsert2; 362 } 363 } 364 if (nonew == 1) goto noinsert2; 365 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 366 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 367 N = nrow++ - 1; 368 high++; 369 /* shift up all the later entries in this row */ 370 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 371 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 372 rp[i] = col; 373 bap = ap + bs2 * i; 374 if (roworiented) { 375 for (ii = 0; ii < bs; ii++) { 376 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 377 } 378 } else { 379 for (ii = 0; ii < bs; ii++) { 380 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 381 } 382 } 383 noinsert2:; 384 ailen[row] = nrow; 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } 387 388 /* 389 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 390 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 391 */ 392 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 393 { 394 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 395 const PetscScalar *value; 396 MatScalar *barray = baij->barray; 397 PetscBool roworiented = baij->roworiented; 398 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 399 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 400 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 401 402 PetscFunctionBegin; 403 if (!barray) { 404 PetscCall(PetscMalloc1(bs2, &barray)); 405 baij->barray = barray; 406 } 407 408 if (roworiented) stepval = (n - 1) * bs; 409 else stepval = (m - 1) * bs; 410 411 for (i = 0; i < m; i++) { 412 if (im[i] < 0) continue; 413 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 414 if (im[i] >= rstart && im[i] < rend) { 415 row = im[i] - rstart; 416 for (j = 0; j < n; j++) { 417 /* If NumCol = 1 then a copy is not required */ 418 if ((roworiented) && (n == 1)) { 419 barray = (MatScalar *)v + i * bs2; 420 } else if ((!roworiented) && (m == 1)) { 421 barray = (MatScalar *)v + j * bs2; 422 } else { /* Here a copy is required */ 423 if (roworiented) { 424 value = v + (i * (stepval + bs) + j) * bs; 425 } else { 426 value = v + (j * (stepval + bs) + i) * bs; 427 } 428 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 429 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 430 barray += bs; 431 } 432 barray -= bs2; 433 } 434 435 if (in[j] >= cstart && in[j] < cend) { 436 col = in[j] - cstart; 437 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 438 } else if (in[j] < 0) { 439 continue; 440 } else { 441 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 442 if (mat->was_assembled) { 443 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 444 445 #if defined(PETSC_USE_DEBUG) 446 #if defined(PETSC_USE_CTABLE) 447 { 448 PetscInt data; 449 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 450 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 451 } 452 #else 453 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 454 #endif 455 #endif 456 #if defined(PETSC_USE_CTABLE) 457 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 458 col = (col - 1) / bs; 459 #else 460 col = (baij->colmap[in[j]] - 1) / bs; 461 #endif 462 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 463 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 464 col = in[j]; 465 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 466 } else col = in[j]; 467 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 468 } 469 } 470 } else { 471 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 472 if (!baij->donotstash) { 473 if (roworiented) { 474 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 475 } else { 476 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 477 } 478 } 479 } 480 } 481 PetscFunctionReturn(PETSC_SUCCESS); 482 } 483 484 #define HASH_KEY 0.6180339887 485 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 486 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 487 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 488 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 489 { 490 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 491 PetscBool roworiented = baij->roworiented; 492 PetscInt i, j, row, col; 493 PetscInt rstart_orig = mat->rmap->rstart; 494 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 495 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 496 PetscReal tmp; 497 MatScalar **HD = baij->hd, value; 498 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 499 500 PetscFunctionBegin; 501 for (i = 0; i < m; i++) { 502 if (PetscDefined(USE_DEBUG)) { 503 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 504 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 505 } 506 row = im[i]; 507 if (row >= rstart_orig && row < rend_orig) { 508 for (j = 0; j < n; j++) { 509 col = in[j]; 510 if (roworiented) value = v[i * n + j]; 511 else value = v[i + j * m]; 512 /* Look up PetscInto the Hash Table */ 513 key = (row / bs) * Nbs + (col / bs) + 1; 514 h1 = HASH(size, key, tmp); 515 516 idx = h1; 517 if (PetscDefined(USE_DEBUG)) { 518 insert_ct++; 519 total_ct++; 520 if (HT[idx] != key) { 521 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 522 ; 523 if (idx == size) { 524 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 525 ; 526 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 527 } 528 } 529 } else if (HT[idx] != key) { 530 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 531 ; 532 if (idx == size) { 533 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 534 ; 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 595 ; 596 if (idx == size) { 597 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 598 ; 599 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 600 } 601 } 602 } else if (HT[idx] != key) { 603 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 604 ; 605 if (idx == size) { 606 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 607 ; 608 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 609 } 610 } 611 baij_a = HD[idx]; 612 if (roworiented) { 613 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 614 /* value = v + (i*(stepval+bs)+j)*bs; */ 615 value = v_t; 616 v_t += bs; 617 if (addv == ADD_VALUES) { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 620 } 621 } else { 622 for (ii = 0; ii < bs; ii++, value += stepval) { 623 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 624 } 625 } 626 } else { 627 value = v + j * (stepval + bs) * bs + i * bs; 628 if (addv == ADD_VALUES) { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 631 } 632 } else { 633 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 634 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 635 } 636 } 637 } 638 } 639 } else { 640 if (!baij->donotstash) { 641 if (roworiented) { 642 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } else { 644 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 645 } 646 } 647 } 648 } 649 if (PetscDefined(USE_DEBUG)) { 650 baij->ht_total_ct += total_ct; 651 baij->ht_insert_ct += insert_ct; 652 } 653 PetscFunctionReturn(PETSC_SUCCESS); 654 } 655 656 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 657 { 658 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 659 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 660 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 661 662 PetscFunctionBegin; 663 for (i = 0; i < m; i++) { 664 if (idxm[i] < 0) continue; /* negative row */ 665 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 666 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 667 row = idxm[i] - bsrstart; 668 for (j = 0; j < n; j++) { 669 if (idxn[j] < 0) continue; /* negative column */ 670 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 671 if (idxn[j] >= bscstart && idxn[j] < bscend) { 672 col = idxn[j] - bscstart; 673 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 674 } else { 675 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 676 #if defined(PETSC_USE_CTABLE) 677 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 678 data--; 679 #else 680 data = baij->colmap[idxn[j] / bs] - 1; 681 #endif 682 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 683 else { 684 col = data + idxn[j] % bs; 685 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 686 } 687 } 688 } 689 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 690 } 691 PetscFunctionReturn(PETSC_SUCCESS); 692 } 693 694 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 695 { 696 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 697 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 698 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 699 PetscReal sum = 0.0; 700 MatScalar *v; 701 702 PetscFunctionBegin; 703 if (baij->size == 1) { 704 PetscCall(MatNorm(baij->A, type, nrm)); 705 } else { 706 if (type == NORM_FROBENIUS) { 707 v = amat->a; 708 nz = amat->nz * bs2; 709 for (i = 0; i < nz; i++) { 710 sum += PetscRealPart(PetscConj(*v) * (*v)); 711 v++; 712 } 713 v = bmat->a; 714 nz = bmat->nz * bs2; 715 for (i = 0; i < nz; i++) { 716 sum += PetscRealPart(PetscConj(*v) * (*v)); 717 v++; 718 } 719 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 720 *nrm = PetscSqrtReal(*nrm); 721 } else if (type == NORM_1) { /* max column sum */ 722 PetscReal *tmp, *tmp2; 723 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 724 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 725 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 726 v = amat->a; 727 jj = amat->j; 728 for (i = 0; i < amat->nz; i++) { 729 for (j = 0; j < bs; j++) { 730 col = bs * (cstart + *jj) + j; /* column index */ 731 for (row = 0; row < bs; row++) { 732 tmp[col] += PetscAbsScalar(*v); 733 v++; 734 } 735 } 736 jj++; 737 } 738 v = bmat->a; 739 jj = bmat->j; 740 for (i = 0; i < bmat->nz; i++) { 741 for (j = 0; j < bs; j++) { 742 col = bs * garray[*jj] + j; 743 for (row = 0; row < bs; row++) { 744 tmp[col] += PetscAbsScalar(*v); 745 v++; 746 } 747 } 748 jj++; 749 } 750 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 751 *nrm = 0.0; 752 for (j = 0; j < mat->cmap->N; j++) { 753 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 754 } 755 PetscCall(PetscFree(tmp)); 756 PetscCall(PetscFree(tmp2)); 757 } else if (type == NORM_INFINITY) { /* max row sum */ 758 PetscReal *sums; 759 PetscCall(PetscMalloc1(bs, &sums)); 760 sum = 0.0; 761 for (j = 0; j < amat->mbs; j++) { 762 for (row = 0; row < bs; row++) sums[row] = 0.0; 763 v = amat->a + bs2 * amat->i[j]; 764 nz = amat->i[j + 1] - amat->i[j]; 765 for (i = 0; i < nz; i++) { 766 for (col = 0; col < bs; col++) { 767 for (row = 0; row < bs; row++) { 768 sums[row] += PetscAbsScalar(*v); 769 v++; 770 } 771 } 772 } 773 v = bmat->a + bs2 * bmat->i[j]; 774 nz = bmat->i[j + 1] - bmat->i[j]; 775 for (i = 0; i < nz; i++) { 776 for (col = 0; col < bs; col++) { 777 for (row = 0; row < bs; row++) { 778 sums[row] += PetscAbsScalar(*v); 779 v++; 780 } 781 } 782 } 783 for (row = 0; row < bs; row++) { 784 if (sums[row] > sum) sum = sums[row]; 785 } 786 } 787 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 788 PetscCall(PetscFree(sums)); 789 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 790 } 791 PetscFunctionReturn(PETSC_SUCCESS); 792 } 793 794 /* 795 Creates the hash table, and sets the table 796 This table is created only once. 797 If new entried need to be added to the matrix 798 then the hash table has to be destroyed and 799 recreated. 800 */ 801 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 802 { 803 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 804 Mat A = baij->A, B = baij->B; 805 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 806 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 807 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 808 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 809 PetscInt *HT, key; 810 MatScalar **HD; 811 PetscReal tmp; 812 #if defined(PETSC_USE_INFO) 813 PetscInt ct = 0, max = 0; 814 #endif 815 816 PetscFunctionBegin; 817 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 818 819 baij->ht_size = (PetscInt)(factor * nz); 820 ht_size = baij->ht_size; 821 822 /* Allocate Memory for Hash Table */ 823 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 824 HD = baij->hd; 825 HT = baij->ht; 826 827 /* Loop Over A */ 828 for (i = 0; i < a->mbs; i++) { 829 for (j = ai[i]; j < ai[i + 1]; j++) { 830 row = i + rstart; 831 col = aj[j] + cstart; 832 833 key = row * Nbs + col + 1; 834 h1 = HASH(ht_size, key, tmp); 835 for (k = 0; k < ht_size; k++) { 836 if (!HT[(h1 + k) % ht_size]) { 837 HT[(h1 + k) % ht_size] = key; 838 HD[(h1 + k) % ht_size] = a->a + j * bs2; 839 break; 840 #if defined(PETSC_USE_INFO) 841 } else { 842 ct++; 843 #endif 844 } 845 } 846 #if defined(PETSC_USE_INFO) 847 if (k > max) max = k; 848 #endif 849 } 850 } 851 /* Loop Over B */ 852 for (i = 0; i < b->mbs; i++) { 853 for (j = bi[i]; j < bi[i + 1]; j++) { 854 row = i + rstart; 855 col = garray[bj[j]]; 856 key = row * Nbs + col + 1; 857 h1 = HASH(ht_size, key, tmp); 858 for (k = 0; k < ht_size; k++) { 859 if (!HT[(h1 + k) % ht_size]) { 860 HT[(h1 + k) % ht_size] = key; 861 HD[(h1 + k) % ht_size] = b->a + j * bs2; 862 break; 863 #if defined(PETSC_USE_INFO) 864 } else { 865 ct++; 866 #endif 867 } 868 } 869 #if defined(PETSC_USE_INFO) 870 if (k > max) max = k; 871 #endif 872 } 873 } 874 875 /* Print Summary */ 876 #if defined(PETSC_USE_INFO) 877 for (i = 0, j = 0; i < ht_size; i++) { 878 if (HT[i]) j++; 879 } 880 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 881 #endif 882 PetscFunctionReturn(PETSC_SUCCESS); 883 } 884 885 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 886 { 887 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 888 PetscInt nstash, reallocs; 889 890 PetscFunctionBegin; 891 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 892 893 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 894 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 895 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 898 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 899 PetscFunctionReturn(PETSC_SUCCESS); 900 } 901 902 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 903 { 904 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 906 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 907 PetscInt *row, *col; 908 PetscBool r1, r2, r3, other_disassembled; 909 MatScalar *val; 910 PetscMPIInt n; 911 912 PetscFunctionBegin; 913 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 914 if (!baij->donotstash && !mat->nooffprocentries) { 915 while (1) { 916 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 917 if (!flg) break; 918 919 for (i = 0; i < n;) { 920 /* Now identify the consecutive vals belonging to the same row */ 921 for (j = i, rstart = row[j]; j < n; j++) { 922 if (row[j] != rstart) break; 923 } 924 if (j < n) ncols = j - i; 925 else ncols = n - i; 926 /* Now assemble all these values with a single function call */ 927 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 928 i = j; 929 } 930 } 931 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 932 /* Now process the block-stash. Since the values are stashed column-oriented, 933 set the roworiented flag to column oriented, and after MatSetValues() 934 restore the original flags */ 935 r1 = baij->roworiented; 936 r2 = a->roworiented; 937 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 938 939 baij->roworiented = PETSC_FALSE; 940 a->roworiented = PETSC_FALSE; 941 942 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 943 while (1) { 944 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 945 if (!flg) break; 946 947 for (i = 0; i < n;) { 948 /* Now identify the consecutive vals belonging to the same row */ 949 for (j = i, rstart = row[j]; j < n; j++) { 950 if (row[j] != rstart) break; 951 } 952 if (j < n) ncols = j - i; 953 else ncols = n - i; 954 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 955 i = j; 956 } 957 } 958 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 959 960 baij->roworiented = r1; 961 a->roworiented = r2; 962 963 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 964 } 965 966 PetscCall(MatAssemblyBegin(baij->A, mode)); 967 PetscCall(MatAssemblyEnd(baij->A, mode)); 968 969 /* determine if any processor has disassembled, if so we must 970 also disassemble ourselves, in order that we may reassemble. */ 971 /* 972 if nonzero structure of submatrix B cannot change then we know that 973 no processor disassembled thus we can skip this stuff 974 */ 975 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 976 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 977 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 978 } 979 980 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 981 PetscCall(MatAssemblyBegin(baij->B, mode)); 982 PetscCall(MatAssemblyEnd(baij->B, mode)); 983 984 #if defined(PETSC_USE_INFO) 985 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 986 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 987 988 baij->ht_total_ct = 0; 989 baij->ht_insert_ct = 0; 990 } 991 #endif 992 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 993 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 994 995 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 996 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 997 } 998 999 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1000 1001 baij->rowvalues = NULL; 1002 1003 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1004 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 1005 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1006 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1007 } 1008 PetscFunctionReturn(PETSC_SUCCESS); 1009 } 1010 1011 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1012 #include <petscdraw.h> 1013 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1014 { 1015 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1016 PetscMPIInt rank = baij->rank; 1017 PetscInt bs = mat->rmap->bs; 1018 PetscBool iascii, isdraw; 1019 PetscViewer sviewer; 1020 PetscViewerFormat format; 1021 1022 PetscFunctionBegin; 1023 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1024 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1025 if (iascii) { 1026 PetscCall(PetscViewerGetFormat(viewer, &format)); 1027 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1028 MatInfo info; 1029 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1030 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1031 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1033 mat->rmap->bs, (double)info.memory)); 1034 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1035 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1036 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1037 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1038 PetscCall(PetscViewerFlush(viewer)); 1039 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1040 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1041 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1044 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1047 PetscFunctionReturn(PETSC_SUCCESS); 1048 } 1049 } 1050 1051 if (isdraw) { 1052 PetscDraw draw; 1053 PetscBool isnull; 1054 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1055 PetscCall(PetscDrawIsNull(draw, &isnull)); 1056 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1057 } 1058 1059 { 1060 /* assemble the entire matrix onto first processor. */ 1061 Mat A; 1062 Mat_SeqBAIJ *Aloc; 1063 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1064 MatScalar *a; 1065 const char *matname; 1066 1067 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1068 /* Perhaps this should be the type of mat? */ 1069 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1070 if (rank == 0) { 1071 PetscCall(MatSetSizes(A, M, N, M, N)); 1072 } else { 1073 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1074 } 1075 PetscCall(MatSetType(A, MATMPIBAIJ)); 1076 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1077 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1078 1079 /* copy over the A part */ 1080 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1081 ai = Aloc->i; 1082 aj = Aloc->j; 1083 a = Aloc->a; 1084 PetscCall(PetscMalloc1(bs, &rvals)); 1085 1086 for (i = 0; i < mbs; i++) { 1087 rvals[0] = bs * (baij->rstartbs + i); 1088 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1089 for (j = ai[i]; j < ai[i + 1]; j++) { 1090 col = (baij->cstartbs + aj[j]) * bs; 1091 for (k = 0; k < bs; k++) { 1092 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1093 col++; 1094 a += bs; 1095 } 1096 } 1097 } 1098 /* copy over the B part */ 1099 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1100 ai = Aloc->i; 1101 aj = Aloc->j; 1102 a = Aloc->a; 1103 for (i = 0; i < mbs; i++) { 1104 rvals[0] = bs * (baij->rstartbs + i); 1105 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1106 for (j = ai[i]; j < ai[i + 1]; j++) { 1107 col = baij->garray[aj[j]] * bs; 1108 for (k = 0; k < bs; k++) { 1109 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1110 col++; 1111 a += bs; 1112 } 1113 } 1114 } 1115 PetscCall(PetscFree(rvals)); 1116 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1117 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1118 /* 1119 Everyone has to call to draw the matrix since the graphics waits are 1120 synchronized across all processors that share the PetscDraw object 1121 */ 1122 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1123 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1124 if (rank == 0) { 1125 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1126 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1127 } 1128 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1129 PetscCall(PetscViewerFlush(viewer)); 1130 PetscCall(MatDestroy(&A)); 1131 } 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1136 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1137 { 1138 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1139 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1140 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1141 const PetscInt *garray = aij->garray; 1142 PetscInt header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l; 1143 PetscInt *rowlens, *colidxs; 1144 PetscScalar *matvals; 1145 1146 PetscFunctionBegin; 1147 PetscCall(PetscViewerSetUp(viewer)); 1148 1149 M = mat->rmap->N; 1150 N = mat->cmap->N; 1151 m = mat->rmap->n; 1152 rs = mat->rmap->rstart; 1153 cs = mat->cmap->rstart; 1154 bs = mat->rmap->bs; 1155 nz = bs * bs * (A->nz + B->nz); 1156 1157 /* write matrix header */ 1158 header[0] = MAT_FILE_CLASSID; 1159 header[1] = M; 1160 header[2] = N; 1161 header[3] = nz; 1162 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1163 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1164 1165 /* fill in and store row lengths */ 1166 PetscCall(PetscMalloc1(m, &rowlens)); 1167 for (cnt = 0, i = 0; i < A->mbs; i++) 1168 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1169 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1170 PetscCall(PetscFree(rowlens)); 1171 1172 /* fill in and store column indices */ 1173 PetscCall(PetscMalloc1(nz, &colidxs)); 1174 for (cnt = 0, i = 0; i < A->mbs; i++) { 1175 for (k = 0; k < bs; k++) { 1176 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1177 if (garray[B->j[jb]] > cs / bs) break; 1178 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1179 } 1180 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1181 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1182 for (; jb < B->i[i + 1]; jb++) 1183 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1184 } 1185 } 1186 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1187 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1188 PetscCall(PetscFree(colidxs)); 1189 1190 /* fill in and store nonzero values */ 1191 PetscCall(PetscMalloc1(nz, &matvals)); 1192 for (cnt = 0, i = 0; i < A->mbs; i++) { 1193 for (k = 0; k < bs; k++) { 1194 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1195 if (garray[B->j[jb]] > cs / bs) break; 1196 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1197 } 1198 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1199 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1200 for (; jb < B->i[i + 1]; jb++) 1201 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1202 } 1203 } 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1205 PetscCall(PetscFree(matvals)); 1206 1207 /* write block size option to the viewer's .info file */ 1208 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1209 PetscFunctionReturn(PETSC_SUCCESS); 1210 } 1211 1212 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1213 { 1214 PetscBool iascii, isdraw, issocket, isbinary; 1215 1216 PetscFunctionBegin; 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1219 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1220 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1221 if (iascii || isdraw || issocket) { 1222 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1223 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1224 PetscFunctionReturn(PETSC_SUCCESS); 1225 } 1226 1227 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1228 { 1229 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1230 PetscInt nt; 1231 1232 PetscFunctionBegin; 1233 PetscCall(VecGetLocalSize(xx, &nt)); 1234 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1235 PetscCall(VecGetLocalSize(yy, &nt)); 1236 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1237 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1238 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1239 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1240 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1245 { 1246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1247 1248 PetscFunctionBegin; 1249 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1250 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1251 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1252 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1253 PetscFunctionReturn(PETSC_SUCCESS); 1254 } 1255 1256 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1257 { 1258 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1259 1260 PetscFunctionBegin; 1261 /* do nondiagonal part */ 1262 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1263 /* do local part */ 1264 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1265 /* add partial results together */ 1266 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1267 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1268 PetscFunctionReturn(PETSC_SUCCESS); 1269 } 1270 1271 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1274 1275 PetscFunctionBegin; 1276 /* do nondiagonal part */ 1277 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1278 /* do local part */ 1279 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1280 /* add partial results together */ 1281 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1282 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 1286 /* 1287 This only works correctly for square matrices where the subblock A->A is the 1288 diagonal block 1289 */ 1290 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1291 { 1292 PetscFunctionBegin; 1293 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1294 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1295 PetscFunctionReturn(PETSC_SUCCESS); 1296 } 1297 1298 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1301 1302 PetscFunctionBegin; 1303 PetscCall(MatScale(a->A, aa)); 1304 PetscCall(MatScale(a->B, aa)); 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } 1307 1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1309 { 1310 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1311 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1312 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1313 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1314 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1315 1316 PetscFunctionBegin; 1317 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1318 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1319 mat->getrowactive = PETSC_TRUE; 1320 1321 if (!mat->rowvalues && (idx || v)) { 1322 /* 1323 allocate enough space to hold information from the longest row. 1324 */ 1325 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1326 PetscInt max = 1, mbs = mat->mbs, tmp; 1327 for (i = 0; i < mbs; i++) { 1328 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1329 if (max < tmp) max = tmp; 1330 } 1331 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1332 } 1333 lrow = row - brstart; 1334 1335 pvA = &vworkA; 1336 pcA = &cworkA; 1337 pvB = &vworkB; 1338 pcB = &cworkB; 1339 if (!v) { 1340 pvA = NULL; 1341 pvB = NULL; 1342 } 1343 if (!idx) { 1344 pcA = NULL; 1345 if (!v) pcB = NULL; 1346 } 1347 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1348 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1349 nztot = nzA + nzB; 1350 1351 cmap = mat->garray; 1352 if (v || idx) { 1353 if (nztot) { 1354 /* Sort by increasing column numbers, assuming A and B already sorted */ 1355 PetscInt imark = -1; 1356 if (v) { 1357 *v = v_p = mat->rowvalues; 1358 for (i = 0; i < nzB; i++) { 1359 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1360 else break; 1361 } 1362 imark = i; 1363 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1364 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1365 } 1366 if (idx) { 1367 *idx = idx_p = mat->rowindices; 1368 if (imark > -1) { 1369 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1370 } else { 1371 for (i = 0; i < nzB; i++) { 1372 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1373 else break; 1374 } 1375 imark = i; 1376 } 1377 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1378 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1379 } 1380 } else { 1381 if (idx) *idx = NULL; 1382 if (v) *v = NULL; 1383 } 1384 } 1385 *nz = nztot; 1386 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1387 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1388 PetscFunctionReturn(PETSC_SUCCESS); 1389 } 1390 1391 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1392 { 1393 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1394 1395 PetscFunctionBegin; 1396 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1397 baij->getrowactive = PETSC_FALSE; 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1402 { 1403 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1404 1405 PetscFunctionBegin; 1406 PetscCall(MatZeroEntries(l->A)); 1407 PetscCall(MatZeroEntries(l->B)); 1408 PetscFunctionReturn(PETSC_SUCCESS); 1409 } 1410 1411 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1414 Mat A = a->A, B = a->B; 1415 PetscLogDouble isend[5], irecv[5]; 1416 1417 PetscFunctionBegin; 1418 info->block_size = (PetscReal)matin->rmap->bs; 1419 1420 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1421 1422 isend[0] = info->nz_used; 1423 isend[1] = info->nz_allocated; 1424 isend[2] = info->nz_unneeded; 1425 isend[3] = info->memory; 1426 isend[4] = info->mallocs; 1427 1428 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1429 1430 isend[0] += info->nz_used; 1431 isend[1] += info->nz_allocated; 1432 isend[2] += info->nz_unneeded; 1433 isend[3] += info->memory; 1434 isend[4] += info->mallocs; 1435 1436 if (flag == MAT_LOCAL) { 1437 info->nz_used = isend[0]; 1438 info->nz_allocated = isend[1]; 1439 info->nz_unneeded = isend[2]; 1440 info->memory = isend[3]; 1441 info->mallocs = isend[4]; 1442 } else if (flag == MAT_GLOBAL_MAX) { 1443 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1444 1445 info->nz_used = irecv[0]; 1446 info->nz_allocated = irecv[1]; 1447 info->nz_unneeded = irecv[2]; 1448 info->memory = irecv[3]; 1449 info->mallocs = irecv[4]; 1450 } else if (flag == MAT_GLOBAL_SUM) { 1451 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1452 1453 info->nz_used = irecv[0]; 1454 info->nz_allocated = irecv[1]; 1455 info->nz_unneeded = irecv[2]; 1456 info->memory = irecv[3]; 1457 info->mallocs = irecv[4]; 1458 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1459 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1460 info->fill_ratio_needed = 0; 1461 info->factor_mallocs = 0; 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1466 { 1467 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1468 1469 PetscFunctionBegin; 1470 switch (op) { 1471 case MAT_NEW_NONZERO_LOCATIONS: 1472 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1473 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1474 case MAT_KEEP_NONZERO_PATTERN: 1475 case MAT_NEW_NONZERO_LOCATION_ERR: 1476 MatCheckPreallocated(A, 1); 1477 PetscCall(MatSetOption(a->A, op, flg)); 1478 PetscCall(MatSetOption(a->B, op, flg)); 1479 break; 1480 case MAT_ROW_ORIENTED: 1481 MatCheckPreallocated(A, 1); 1482 a->roworiented = flg; 1483 1484 PetscCall(MatSetOption(a->A, op, flg)); 1485 PetscCall(MatSetOption(a->B, op, flg)); 1486 break; 1487 case MAT_FORCE_DIAGONAL_ENTRIES: 1488 case MAT_SORTED_FULL: 1489 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1490 break; 1491 case MAT_IGNORE_OFF_PROC_ENTRIES: 1492 a->donotstash = flg; 1493 break; 1494 case MAT_USE_HASH_TABLE: 1495 a->ht_flag = flg; 1496 a->ht_fact = 1.39; 1497 break; 1498 case MAT_SYMMETRIC: 1499 case MAT_STRUCTURALLY_SYMMETRIC: 1500 case MAT_HERMITIAN: 1501 case MAT_SUBMAT_SINGLEIS: 1502 case MAT_SYMMETRY_ETERNAL: 1503 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1504 case MAT_SPD_ETERNAL: 1505 /* if the diagonal matrix is square it inherits some of the properties above */ 1506 break; 1507 default: 1508 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1509 } 1510 PetscFunctionReturn(PETSC_SUCCESS); 1511 } 1512 1513 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1514 { 1515 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1516 Mat_SeqBAIJ *Aloc; 1517 Mat B; 1518 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1519 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1520 MatScalar *a; 1521 1522 PetscFunctionBegin; 1523 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1524 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1525 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1526 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1527 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1528 /* Do not know preallocation information, but must set block size */ 1529 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1530 } else { 1531 B = *matout; 1532 } 1533 1534 /* copy over the A part */ 1535 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1536 ai = Aloc->i; 1537 aj = Aloc->j; 1538 a = Aloc->a; 1539 PetscCall(PetscMalloc1(bs, &rvals)); 1540 1541 for (i = 0; i < mbs; i++) { 1542 rvals[0] = bs * (baij->rstartbs + i); 1543 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1544 for (j = ai[i]; j < ai[i + 1]; j++) { 1545 col = (baij->cstartbs + aj[j]) * bs; 1546 for (k = 0; k < bs; k++) { 1547 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1548 1549 col++; 1550 a += bs; 1551 } 1552 } 1553 } 1554 /* copy over the B part */ 1555 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1556 ai = Aloc->i; 1557 aj = Aloc->j; 1558 a = Aloc->a; 1559 for (i = 0; i < mbs; i++) { 1560 rvals[0] = bs * (baij->rstartbs + i); 1561 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1562 for (j = ai[i]; j < ai[i + 1]; j++) { 1563 col = baij->garray[aj[j]] * bs; 1564 for (k = 0; k < bs; k++) { 1565 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1566 col++; 1567 a += bs; 1568 } 1569 } 1570 } 1571 PetscCall(PetscFree(rvals)); 1572 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1574 1575 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1576 else PetscCall(MatHeaderMerge(A, &B)); 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1581 { 1582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1583 Mat a = baij->A, b = baij->B; 1584 PetscInt s1, s2, s3; 1585 1586 PetscFunctionBegin; 1587 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1588 if (rr) { 1589 PetscCall(VecGetLocalSize(rr, &s1)); 1590 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1591 /* Overlap communication with computation. */ 1592 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1593 } 1594 if (ll) { 1595 PetscCall(VecGetLocalSize(ll, &s1)); 1596 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1597 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1598 } 1599 /* scale the diagonal block */ 1600 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1601 1602 if (rr) { 1603 /* Do a scatter end and then right scale the off-diagonal block */ 1604 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1605 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1606 } 1607 PetscFunctionReturn(PETSC_SUCCESS); 1608 } 1609 1610 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1611 { 1612 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1613 PetscInt *lrows; 1614 PetscInt r, len; 1615 PetscBool cong; 1616 1617 PetscFunctionBegin; 1618 /* get locally owned rows */ 1619 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1620 /* fix right hand side if needed */ 1621 if (x && b) { 1622 const PetscScalar *xx; 1623 PetscScalar *bb; 1624 1625 PetscCall(VecGetArrayRead(x, &xx)); 1626 PetscCall(VecGetArray(b, &bb)); 1627 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1628 PetscCall(VecRestoreArrayRead(x, &xx)); 1629 PetscCall(VecRestoreArray(b, &bb)); 1630 } 1631 1632 /* actually zap the local rows */ 1633 /* 1634 Zero the required rows. If the "diagonal block" of the matrix 1635 is square and the user wishes to set the diagonal we use separate 1636 code so that MatSetValues() is not called for each diagonal allocating 1637 new memory, thus calling lots of mallocs and slowing things down. 1638 1639 */ 1640 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1641 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1642 PetscCall(MatHasCongruentLayouts(A, &cong)); 1643 if ((diag != 0.0) && cong) { 1644 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1645 } else if (diag != 0.0) { 1646 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1647 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1648 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1649 for (r = 0; r < len; ++r) { 1650 const PetscInt row = lrows[r] + A->rmap->rstart; 1651 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1652 } 1653 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1654 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1655 } else { 1656 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1657 } 1658 PetscCall(PetscFree(lrows)); 1659 1660 /* only change matrix nonzero state if pattern was allowed to be changed */ 1661 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1662 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1663 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1664 } 1665 PetscFunctionReturn(PETSC_SUCCESS); 1666 } 1667 1668 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1669 { 1670 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1671 PetscMPIInt n = A->rmap->n, p = 0; 1672 PetscInt i, j, k, r, len = 0, row, col, count; 1673 PetscInt *lrows, *owners = A->rmap->range; 1674 PetscSFNode *rrows; 1675 PetscSF sf; 1676 const PetscScalar *xx; 1677 PetscScalar *bb, *mask; 1678 Vec xmask, lmask; 1679 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1680 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1681 PetscScalar *aa; 1682 1683 PetscFunctionBegin; 1684 /* Create SF where leaves are input rows and roots are owned rows */ 1685 PetscCall(PetscMalloc1(n, &lrows)); 1686 for (r = 0; r < n; ++r) lrows[r] = -1; 1687 PetscCall(PetscMalloc1(N, &rrows)); 1688 for (r = 0; r < N; ++r) { 1689 const PetscInt idx = rows[r]; 1690 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1691 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1692 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1693 } 1694 rrows[r].rank = p; 1695 rrows[r].index = rows[r] - owners[p]; 1696 } 1697 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1698 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1699 /* Collect flags for rows to be zeroed */ 1700 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1701 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1702 PetscCall(PetscSFDestroy(&sf)); 1703 /* Compress and put in row numbers */ 1704 for (r = 0; r < n; ++r) 1705 if (lrows[r] >= 0) lrows[len++] = r; 1706 /* zero diagonal part of matrix */ 1707 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1708 /* handle off diagonal part of matrix */ 1709 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1710 PetscCall(VecDuplicate(l->lvec, &lmask)); 1711 PetscCall(VecGetArray(xmask, &bb)); 1712 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1713 PetscCall(VecRestoreArray(xmask, &bb)); 1714 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1716 PetscCall(VecDestroy(&xmask)); 1717 if (x) { 1718 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1719 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1720 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1721 PetscCall(VecGetArray(b, &bb)); 1722 } 1723 PetscCall(VecGetArray(lmask, &mask)); 1724 /* remove zeroed rows of off diagonal matrix */ 1725 for (i = 0; i < len; ++i) { 1726 row = lrows[i]; 1727 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1728 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1729 for (k = 0; k < count; ++k) { 1730 aa[0] = 0.0; 1731 aa += bs; 1732 } 1733 } 1734 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1735 for (i = 0; i < l->B->rmap->N; ++i) { 1736 row = i / bs; 1737 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1738 for (k = 0; k < bs; ++k) { 1739 col = bs * baij->j[j] + k; 1740 if (PetscAbsScalar(mask[col])) { 1741 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1742 if (x) bb[i] -= aa[0] * xx[col]; 1743 aa[0] = 0.0; 1744 } 1745 } 1746 } 1747 } 1748 if (x) { 1749 PetscCall(VecRestoreArray(b, &bb)); 1750 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1751 } 1752 PetscCall(VecRestoreArray(lmask, &mask)); 1753 PetscCall(VecDestroy(&lmask)); 1754 PetscCall(PetscFree(lrows)); 1755 1756 /* only change matrix nonzero state if pattern was allowed to be changed */ 1757 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1758 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1759 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1760 } 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 1764 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1765 { 1766 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1767 1768 PetscFunctionBegin; 1769 PetscCall(MatSetUnfactored(a->A)); 1770 PetscFunctionReturn(PETSC_SUCCESS); 1771 } 1772 1773 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1774 1775 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1776 { 1777 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1778 Mat a, b, c, d; 1779 PetscBool flg; 1780 1781 PetscFunctionBegin; 1782 a = matA->A; 1783 b = matA->B; 1784 c = matB->A; 1785 d = matB->B; 1786 1787 PetscCall(MatEqual(a, c, &flg)); 1788 if (flg) PetscCall(MatEqual(b, d, &flg)); 1789 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1794 { 1795 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1796 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 PetscCall(MatCopy_Basic(A, B, str)); 1802 } else { 1803 PetscCall(MatCopy(a->A, b->A, str)); 1804 PetscCall(MatCopy(a->B, b->B, str)); 1805 } 1806 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1807 PetscFunctionReturn(PETSC_SUCCESS); 1808 } 1809 1810 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1811 { 1812 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1813 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1814 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1815 1816 PetscFunctionBegin; 1817 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 1821 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1822 { 1823 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1824 PetscBLASInt bnz, one = 1; 1825 Mat_SeqBAIJ *x, *y; 1826 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1827 1828 PetscFunctionBegin; 1829 if (str == SAME_NONZERO_PATTERN) { 1830 PetscScalar alpha = a; 1831 x = (Mat_SeqBAIJ *)xx->A->data; 1832 y = (Mat_SeqBAIJ *)yy->A->data; 1833 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1834 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1835 x = (Mat_SeqBAIJ *)xx->B->data; 1836 y = (Mat_SeqBAIJ *)yy->B->data; 1837 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1838 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1839 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1840 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1841 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1842 } else { 1843 Mat B; 1844 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1845 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1846 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1847 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1848 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1849 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1850 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1851 PetscCall(MatSetType(B, MATMPIBAIJ)); 1852 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1853 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1854 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1855 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1856 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1857 PetscCall(MatHeaderMerge(Y, &B)); 1858 PetscCall(PetscFree(nnz_d)); 1859 PetscCall(PetscFree(nnz_o)); 1860 } 1861 PetscFunctionReturn(PETSC_SUCCESS); 1862 } 1863 1864 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1865 { 1866 PetscFunctionBegin; 1867 if (PetscDefined(USE_COMPLEX)) { 1868 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1869 1870 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1871 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1872 } 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1877 { 1878 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1879 1880 PetscFunctionBegin; 1881 PetscCall(MatRealPart(a->A)); 1882 PetscCall(MatRealPart(a->B)); 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1887 { 1888 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1889 1890 PetscFunctionBegin; 1891 PetscCall(MatImaginaryPart(a->A)); 1892 PetscCall(MatImaginaryPart(a->B)); 1893 PetscFunctionReturn(PETSC_SUCCESS); 1894 } 1895 1896 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1897 { 1898 IS iscol_local; 1899 PetscInt csize; 1900 1901 PetscFunctionBegin; 1902 PetscCall(ISGetLocalSize(iscol, &csize)); 1903 if (call == MAT_REUSE_MATRIX) { 1904 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1905 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1906 } else { 1907 PetscCall(ISAllGather(iscol, &iscol_local)); 1908 } 1909 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1910 if (call == MAT_INITIAL_MATRIX) { 1911 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1912 PetscCall(ISDestroy(&iscol_local)); 1913 } 1914 PetscFunctionReturn(PETSC_SUCCESS); 1915 } 1916 1917 /* 1918 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1919 in local and then by concatenating the local matrices the end result. 1920 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1921 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1922 */ 1923 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 1924 { 1925 PetscMPIInt rank, size; 1926 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1927 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1928 Mat M, Mreuse; 1929 MatScalar *vwork, *aa; 1930 MPI_Comm comm; 1931 IS isrow_new, iscol_new; 1932 Mat_SeqBAIJ *aij; 1933 1934 PetscFunctionBegin; 1935 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1936 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1937 PetscCallMPI(MPI_Comm_size(comm, &size)); 1938 /* The compression and expansion should be avoided. Doesn't point 1939 out errors, might change the indices, hence buggey */ 1940 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1941 PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1942 1943 if (call == MAT_REUSE_MATRIX) { 1944 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1945 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1946 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse)); 1947 } else { 1948 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse)); 1949 } 1950 PetscCall(ISDestroy(&isrow_new)); 1951 PetscCall(ISDestroy(&iscol_new)); 1952 /* 1953 m - number of local rows 1954 n - number of columns (same on all processors) 1955 rstart - first row in new global matrix generated 1956 */ 1957 PetscCall(MatGetBlockSize(mat, &bs)); 1958 PetscCall(MatGetSize(Mreuse, &m, &n)); 1959 m = m / bs; 1960 n = n / bs; 1961 1962 if (call == MAT_INITIAL_MATRIX) { 1963 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1964 ii = aij->i; 1965 jj = aij->j; 1966 1967 /* 1968 Determine the number of non-zeros in the diagonal and off-diagonal 1969 portions of the matrix in order to do correct preallocation 1970 */ 1971 1972 /* first get start and end of "diagonal" columns */ 1973 if (csize == PETSC_DECIDE) { 1974 PetscCall(ISGetSize(isrow, &mglobal)); 1975 if (mglobal == n * bs) { /* square matrix */ 1976 nlocal = m; 1977 } else { 1978 nlocal = n / size + ((n % size) > rank); 1979 } 1980 } else { 1981 nlocal = csize / bs; 1982 } 1983 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1984 rstart = rend - nlocal; 1985 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1986 1987 /* next, compute all the lengths */ 1988 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1989 for (i = 0; i < m; i++) { 1990 jend = ii[i + 1] - ii[i]; 1991 olen = 0; 1992 dlen = 0; 1993 for (j = 0; j < jend; j++) { 1994 if (*jj < rstart || *jj >= rend) olen++; 1995 else dlen++; 1996 jj++; 1997 } 1998 olens[i] = olen; 1999 dlens[i] = dlen; 2000 } 2001 PetscCall(MatCreate(comm, &M)); 2002 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2003 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 2004 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2005 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2006 PetscCall(PetscFree2(dlens, olens)); 2007 } else { 2008 PetscInt ml, nl; 2009 2010 M = *newmat; 2011 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2012 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2013 PetscCall(MatZeroEntries(M)); 2014 /* 2015 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2016 rather than the slower MatSetValues(). 2017 */ 2018 M->was_assembled = PETSC_TRUE; 2019 M->assembled = PETSC_FALSE; 2020 } 2021 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2022 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2023 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2024 ii = aij->i; 2025 jj = aij->j; 2026 aa = aij->a; 2027 for (i = 0; i < m; i++) { 2028 row = rstart / bs + i; 2029 nz = ii[i + 1] - ii[i]; 2030 cwork = jj; 2031 jj += nz; 2032 vwork = aa; 2033 aa += nz * bs * bs; 2034 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2035 } 2036 2037 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2038 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2039 *newmat = M; 2040 2041 /* save submatrix used in processor for next request */ 2042 if (call == MAT_INITIAL_MATRIX) { 2043 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2044 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2045 } 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2050 { 2051 MPI_Comm comm, pcomm; 2052 PetscInt clocal_size, nrows; 2053 const PetscInt *rows; 2054 PetscMPIInt size; 2055 IS crowp, lcolp; 2056 2057 PetscFunctionBegin; 2058 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2059 /* make a collective version of 'rowp' */ 2060 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2061 if (pcomm == comm) { 2062 crowp = rowp; 2063 } else { 2064 PetscCall(ISGetSize(rowp, &nrows)); 2065 PetscCall(ISGetIndices(rowp, &rows)); 2066 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2067 PetscCall(ISRestoreIndices(rowp, &rows)); 2068 } 2069 PetscCall(ISSetPermutation(crowp)); 2070 /* make a local version of 'colp' */ 2071 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2072 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2073 if (size == 1) { 2074 lcolp = colp; 2075 } else { 2076 PetscCall(ISAllGather(colp, &lcolp)); 2077 } 2078 PetscCall(ISSetPermutation(lcolp)); 2079 /* now we just get the submatrix */ 2080 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2081 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B)); 2082 /* clean up */ 2083 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2084 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2085 PetscFunctionReturn(PETSC_SUCCESS); 2086 } 2087 2088 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2089 { 2090 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2091 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2092 2093 PetscFunctionBegin; 2094 if (nghosts) *nghosts = B->nbs; 2095 if (ghosts) *ghosts = baij->garray; 2096 PetscFunctionReturn(PETSC_SUCCESS); 2097 } 2098 2099 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2100 { 2101 Mat B; 2102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2103 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2104 Mat_SeqAIJ *b; 2105 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2106 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2107 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2108 2109 PetscFunctionBegin; 2110 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2111 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2112 2113 /* ---------------------------------------------------------------- 2114 Tell every processor the number of nonzeros per row 2115 */ 2116 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2117 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2118 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2119 displs = recvcounts + size; 2120 for (i = 0; i < size; i++) { 2121 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2122 displs[i] = A->rmap->range[i] / bs; 2123 } 2124 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2125 /* --------------------------------------------------------------- 2126 Create the sequential matrix of the same type as the local block diagonal 2127 */ 2128 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2129 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2130 PetscCall(MatSetType(B, MATSEQAIJ)); 2131 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2132 b = (Mat_SeqAIJ *)B->data; 2133 2134 /*-------------------------------------------------------------------- 2135 Copy my part of matrix column indices over 2136 */ 2137 sendcount = ad->nz + bd->nz; 2138 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2139 a_jsendbuf = ad->j; 2140 b_jsendbuf = bd->j; 2141 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2142 cnt = 0; 2143 for (i = 0; i < n; i++) { 2144 /* put in lower diagonal portion */ 2145 m = bd->i[i + 1] - bd->i[i]; 2146 while (m > 0) { 2147 /* is it above diagonal (in bd (compressed) numbering) */ 2148 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2149 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2150 m--; 2151 } 2152 2153 /* put in diagonal portion */ 2154 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2155 2156 /* put in upper diagonal portion */ 2157 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2158 } 2159 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2160 2161 /*-------------------------------------------------------------------- 2162 Gather all column indices to all processors 2163 */ 2164 for (i = 0; i < size; i++) { 2165 recvcounts[i] = 0; 2166 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2167 } 2168 displs[0] = 0; 2169 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2170 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2171 /*-------------------------------------------------------------------- 2172 Assemble the matrix into useable form (note numerical values not yet set) 2173 */ 2174 /* set the b->ilen (length of each row) values */ 2175 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2176 /* set the b->i indices */ 2177 b->i[0] = 0; 2178 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2179 PetscCall(PetscFree(lens)); 2180 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2181 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2182 PetscCall(PetscFree(recvcounts)); 2183 2184 PetscCall(MatPropagateSymmetryOptions(A, B)); 2185 *newmat = B; 2186 PetscFunctionReturn(PETSC_SUCCESS); 2187 } 2188 2189 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2190 { 2191 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2192 Vec bb1 = NULL; 2193 2194 PetscFunctionBegin; 2195 if (flag == SOR_APPLY_UPPER) { 2196 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2197 PetscFunctionReturn(PETSC_SUCCESS); 2198 } 2199 2200 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2201 2202 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2203 if (flag & SOR_ZERO_INITIAL_GUESS) { 2204 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2205 its--; 2206 } 2207 2208 while (its--) { 2209 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2210 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2211 2212 /* update rhs: bb1 = bb - B*x */ 2213 PetscCall(VecScale(mat->lvec, -1.0)); 2214 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2215 2216 /* local sweep */ 2217 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2218 } 2219 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2220 if (flag & SOR_ZERO_INITIAL_GUESS) { 2221 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2222 its--; 2223 } 2224 while (its--) { 2225 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2226 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2227 2228 /* update rhs: bb1 = bb - B*x */ 2229 PetscCall(VecScale(mat->lvec, -1.0)); 2230 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2231 2232 /* local sweep */ 2233 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2234 } 2235 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2236 if (flag & SOR_ZERO_INITIAL_GUESS) { 2237 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2238 its--; 2239 } 2240 while (its--) { 2241 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2242 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2243 2244 /* update rhs: bb1 = bb - B*x */ 2245 PetscCall(VecScale(mat->lvec, -1.0)); 2246 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2247 2248 /* local sweep */ 2249 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2250 } 2251 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2252 2253 PetscCall(VecDestroy(&bb1)); 2254 PetscFunctionReturn(PETSC_SUCCESS); 2255 } 2256 2257 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2258 { 2259 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2260 PetscInt m, N, i, *garray = aij->garray; 2261 PetscInt ib, jb, bs = A->rmap->bs; 2262 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2263 MatScalar *a_val = a_aij->a; 2264 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2265 MatScalar *b_val = b_aij->a; 2266 PetscReal *work; 2267 2268 PetscFunctionBegin; 2269 PetscCall(MatGetSize(A, &m, &N)); 2270 PetscCall(PetscCalloc1(N, &work)); 2271 if (type == NORM_2) { 2272 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2273 for (jb = 0; jb < bs; jb++) { 2274 for (ib = 0; ib < bs; ib++) { 2275 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2276 a_val++; 2277 } 2278 } 2279 } 2280 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2281 for (jb = 0; jb < bs; jb++) { 2282 for (ib = 0; ib < bs; ib++) { 2283 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2284 b_val++; 2285 } 2286 } 2287 } 2288 } else if (type == NORM_1) { 2289 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2290 for (jb = 0; jb < bs; jb++) { 2291 for (ib = 0; ib < bs; ib++) { 2292 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2293 a_val++; 2294 } 2295 } 2296 } 2297 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2298 for (jb = 0; jb < bs; jb++) { 2299 for (ib = 0; ib < bs; ib++) { 2300 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2301 b_val++; 2302 } 2303 } 2304 } 2305 } else if (type == NORM_INFINITY) { 2306 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2307 for (jb = 0; jb < bs; jb++) { 2308 for (ib = 0; ib < bs; ib++) { 2309 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2310 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2311 a_val++; 2312 } 2313 } 2314 } 2315 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2316 for (jb = 0; jb < bs; jb++) { 2317 for (ib = 0; ib < bs; ib++) { 2318 int col = garray[b_aij->j[i]] * bs + jb; 2319 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2320 b_val++; 2321 } 2322 } 2323 } 2324 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2325 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2326 for (jb = 0; jb < bs; jb++) { 2327 for (ib = 0; ib < bs; ib++) { 2328 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2329 a_val++; 2330 } 2331 } 2332 } 2333 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2334 for (jb = 0; jb < bs; jb++) { 2335 for (ib = 0; ib < bs; ib++) { 2336 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2337 b_val++; 2338 } 2339 } 2340 } 2341 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2342 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2343 for (jb = 0; jb < bs; jb++) { 2344 for (ib = 0; ib < bs; ib++) { 2345 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2346 a_val++; 2347 } 2348 } 2349 } 2350 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2351 for (jb = 0; jb < bs; jb++) { 2352 for (ib = 0; ib < bs; ib++) { 2353 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2354 b_val++; 2355 } 2356 } 2357 } 2358 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2359 if (type == NORM_INFINITY) { 2360 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2361 } else { 2362 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2363 } 2364 PetscCall(PetscFree(work)); 2365 if (type == NORM_2) { 2366 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2367 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2368 for (i = 0; i < N; i++) reductions[i] /= m; 2369 } 2370 PetscFunctionReturn(PETSC_SUCCESS); 2371 } 2372 2373 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2374 { 2375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2376 2377 PetscFunctionBegin; 2378 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2379 A->factorerrortype = a->A->factorerrortype; 2380 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2381 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2382 PetscFunctionReturn(PETSC_SUCCESS); 2383 } 2384 2385 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2386 { 2387 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2388 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2389 2390 PetscFunctionBegin; 2391 if (!Y->preallocated) { 2392 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2393 } else if (!aij->nz) { 2394 PetscInt nonew = aij->nonew; 2395 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2396 aij->nonew = nonew; 2397 } 2398 PetscCall(MatShift_Basic(Y, a)); 2399 PetscFunctionReturn(PETSC_SUCCESS); 2400 } 2401 2402 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2403 { 2404 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2405 2406 PetscFunctionBegin; 2407 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2408 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2409 if (d) { 2410 PetscInt rstart; 2411 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2412 *d += rstart / A->rmap->bs; 2413 } 2414 PetscFunctionReturn(PETSC_SUCCESS); 2415 } 2416 2417 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2418 { 2419 PetscFunctionBegin; 2420 *a = ((Mat_MPIBAIJ *)A->data)->A; 2421 PetscFunctionReturn(PETSC_SUCCESS); 2422 } 2423 2424 /* -------------------------------------------------------------------*/ 2425 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2426 MatGetRow_MPIBAIJ, 2427 MatRestoreRow_MPIBAIJ, 2428 MatMult_MPIBAIJ, 2429 /* 4*/ MatMultAdd_MPIBAIJ, 2430 MatMultTranspose_MPIBAIJ, 2431 MatMultTransposeAdd_MPIBAIJ, 2432 NULL, 2433 NULL, 2434 NULL, 2435 /*10*/ NULL, 2436 NULL, 2437 NULL, 2438 MatSOR_MPIBAIJ, 2439 MatTranspose_MPIBAIJ, 2440 /*15*/ MatGetInfo_MPIBAIJ, 2441 MatEqual_MPIBAIJ, 2442 MatGetDiagonal_MPIBAIJ, 2443 MatDiagonalScale_MPIBAIJ, 2444 MatNorm_MPIBAIJ, 2445 /*20*/ MatAssemblyBegin_MPIBAIJ, 2446 MatAssemblyEnd_MPIBAIJ, 2447 MatSetOption_MPIBAIJ, 2448 MatZeroEntries_MPIBAIJ, 2449 /*24*/ MatZeroRows_MPIBAIJ, 2450 NULL, 2451 NULL, 2452 NULL, 2453 NULL, 2454 /*29*/ MatSetUp_MPI_Hash, 2455 NULL, 2456 NULL, 2457 MatGetDiagonalBlock_MPIBAIJ, 2458 NULL, 2459 /*34*/ MatDuplicate_MPIBAIJ, 2460 NULL, 2461 NULL, 2462 NULL, 2463 NULL, 2464 /*39*/ MatAXPY_MPIBAIJ, 2465 MatCreateSubMatrices_MPIBAIJ, 2466 MatIncreaseOverlap_MPIBAIJ, 2467 MatGetValues_MPIBAIJ, 2468 MatCopy_MPIBAIJ, 2469 /*44*/ NULL, 2470 MatScale_MPIBAIJ, 2471 MatShift_MPIBAIJ, 2472 NULL, 2473 MatZeroRowsColumns_MPIBAIJ, 2474 /*49*/ NULL, 2475 NULL, 2476 NULL, 2477 NULL, 2478 NULL, 2479 /*54*/ MatFDColoringCreate_MPIXAIJ, 2480 NULL, 2481 MatSetUnfactored_MPIBAIJ, 2482 MatPermute_MPIBAIJ, 2483 MatSetValuesBlocked_MPIBAIJ, 2484 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2485 MatDestroy_MPIBAIJ, 2486 MatView_MPIBAIJ, 2487 NULL, 2488 NULL, 2489 /*64*/ NULL, 2490 NULL, 2491 NULL, 2492 NULL, 2493 NULL, 2494 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2495 NULL, 2496 NULL, 2497 NULL, 2498 NULL, 2499 /*74*/ NULL, 2500 MatFDColoringApply_BAIJ, 2501 NULL, 2502 NULL, 2503 NULL, 2504 /*79*/ NULL, 2505 NULL, 2506 NULL, 2507 NULL, 2508 MatLoad_MPIBAIJ, 2509 /*84*/ NULL, 2510 NULL, 2511 NULL, 2512 NULL, 2513 NULL, 2514 /*89*/ NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 NULL, 2519 /*94*/ NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 NULL, 2524 /*99*/ NULL, 2525 NULL, 2526 NULL, 2527 MatConjugate_MPIBAIJ, 2528 NULL, 2529 /*104*/ NULL, 2530 MatRealPart_MPIBAIJ, 2531 MatImaginaryPart_MPIBAIJ, 2532 NULL, 2533 NULL, 2534 /*109*/ NULL, 2535 NULL, 2536 NULL, 2537 NULL, 2538 MatMissingDiagonal_MPIBAIJ, 2539 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2540 NULL, 2541 MatGetGhosts_MPIBAIJ, 2542 NULL, 2543 NULL, 2544 /*119*/ NULL, 2545 NULL, 2546 NULL, 2547 NULL, 2548 MatGetMultiProcBlock_MPIBAIJ, 2549 /*124*/ NULL, 2550 MatGetColumnReductions_MPIBAIJ, 2551 MatInvertBlockDiagonal_MPIBAIJ, 2552 NULL, 2553 NULL, 2554 /*129*/ NULL, 2555 NULL, 2556 NULL, 2557 NULL, 2558 NULL, 2559 /*134*/ NULL, 2560 NULL, 2561 NULL, 2562 NULL, 2563 NULL, 2564 /*139*/ MatSetBlockSizes_Default, 2565 NULL, 2566 NULL, 2567 MatFDColoringSetUp_MPIXAIJ, 2568 NULL, 2569 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2570 NULL, 2571 NULL, 2572 NULL, 2573 NULL, 2574 NULL, 2575 /*150*/ NULL, 2576 NULL}; 2577 2578 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2579 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2580 2581 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2582 { 2583 PetscInt m, rstart, cstart, cend; 2584 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2585 const PetscInt *JJ = NULL; 2586 PetscScalar *values = NULL; 2587 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2588 PetscBool nooffprocentries; 2589 2590 PetscFunctionBegin; 2591 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2592 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2593 PetscCall(PetscLayoutSetUp(B->rmap)); 2594 PetscCall(PetscLayoutSetUp(B->cmap)); 2595 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2596 m = B->rmap->n / bs; 2597 rstart = B->rmap->rstart / bs; 2598 cstart = B->cmap->rstart / bs; 2599 cend = B->cmap->rend / bs; 2600 2601 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2602 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2603 for (i = 0; i < m; i++) { 2604 nz = ii[i + 1] - ii[i]; 2605 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2606 nz_max = PetscMax(nz_max, nz); 2607 dlen = 0; 2608 olen = 0; 2609 JJ = jj + ii[i]; 2610 for (j = 0; j < nz; j++) { 2611 if (*JJ < cstart || *JJ >= cend) olen++; 2612 else dlen++; 2613 JJ++; 2614 } 2615 d_nnz[i] = dlen; 2616 o_nnz[i] = olen; 2617 } 2618 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2619 PetscCall(PetscFree2(d_nnz, o_nnz)); 2620 2621 values = (PetscScalar *)V; 2622 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2623 for (i = 0; i < m; i++) { 2624 PetscInt row = i + rstart; 2625 PetscInt ncols = ii[i + 1] - ii[i]; 2626 const PetscInt *icols = jj + ii[i]; 2627 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2628 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2629 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2630 } else { /* block ordering does not match so we can only insert one block at a time. */ 2631 PetscInt j; 2632 for (j = 0; j < ncols; j++) { 2633 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2634 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2635 } 2636 } 2637 } 2638 2639 if (!V) PetscCall(PetscFree(values)); 2640 nooffprocentries = B->nooffprocentries; 2641 B->nooffprocentries = PETSC_TRUE; 2642 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2643 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2644 B->nooffprocentries = nooffprocentries; 2645 2646 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 /*@C 2651 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2652 2653 Collective 2654 2655 Input Parameters: 2656 + B - the matrix 2657 . bs - the block size 2658 . i - the indices into j for the start of each local row (starts with zero) 2659 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2660 - v - optional values in the matrix 2661 2662 Level: advanced 2663 2664 Notes: 2665 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2666 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2667 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2668 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2669 block column and the second index is over columns within a block. 2670 2671 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2672 2673 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2674 @*/ 2675 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2676 { 2677 PetscFunctionBegin; 2678 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2679 PetscValidType(B, 1); 2680 PetscValidLogicalCollectiveInt(B, bs, 2); 2681 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2686 { 2687 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2688 PetscInt i; 2689 PetscMPIInt size; 2690 2691 PetscFunctionBegin; 2692 if (B->hash_active) { 2693 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 2694 B->hash_active = PETSC_FALSE; 2695 } 2696 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2697 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2698 PetscCall(PetscLayoutSetUp(B->rmap)); 2699 PetscCall(PetscLayoutSetUp(B->cmap)); 2700 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2701 2702 if (d_nnz) { 2703 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2704 } 2705 if (o_nnz) { 2706 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2707 } 2708 2709 b->bs2 = bs * bs; 2710 b->mbs = B->rmap->n / bs; 2711 b->nbs = B->cmap->n / bs; 2712 b->Mbs = B->rmap->N / bs; 2713 b->Nbs = B->cmap->N / bs; 2714 2715 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2716 b->rstartbs = B->rmap->rstart / bs; 2717 b->rendbs = B->rmap->rend / bs; 2718 b->cstartbs = B->cmap->rstart / bs; 2719 b->cendbs = B->cmap->rend / bs; 2720 2721 #if defined(PETSC_USE_CTABLE) 2722 PetscCall(PetscHMapIDestroy(&b->colmap)); 2723 #else 2724 PetscCall(PetscFree(b->colmap)); 2725 #endif 2726 PetscCall(PetscFree(b->garray)); 2727 PetscCall(VecDestroy(&b->lvec)); 2728 PetscCall(VecScatterDestroy(&b->Mvctx)); 2729 2730 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2731 PetscCall(MatDestroy(&b->B)); 2732 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2733 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2734 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2735 2736 PetscCall(MatDestroy(&b->A)); 2737 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2738 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2739 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2740 2741 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2742 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2743 B->preallocated = PETSC_TRUE; 2744 B->was_assembled = PETSC_FALSE; 2745 B->assembled = PETSC_FALSE; 2746 PetscFunctionReturn(PETSC_SUCCESS); 2747 } 2748 2749 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2750 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2751 2752 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2753 { 2754 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2755 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2756 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2757 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2758 2759 PetscFunctionBegin; 2760 PetscCall(PetscMalloc1(M + 1, &ii)); 2761 ii[0] = 0; 2762 for (i = 0; i < M; i++) { 2763 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2764 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2765 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2766 /* remove one from count of matrix has diagonal */ 2767 for (j = id[i]; j < id[i + 1]; j++) { 2768 if (jd[j] == i) { 2769 ii[i + 1]--; 2770 break; 2771 } 2772 } 2773 } 2774 PetscCall(PetscMalloc1(ii[M], &jj)); 2775 cnt = 0; 2776 for (i = 0; i < M; i++) { 2777 for (j = io[i]; j < io[i + 1]; j++) { 2778 if (garray[jo[j]] > rstart) break; 2779 jj[cnt++] = garray[jo[j]]; 2780 } 2781 for (k = id[i]; k < id[i + 1]; k++) { 2782 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2783 } 2784 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2785 } 2786 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2787 PetscFunctionReturn(PETSC_SUCCESS); 2788 } 2789 2790 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2791 2792 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2793 2794 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2795 { 2796 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2797 Mat_MPIAIJ *b; 2798 Mat B; 2799 2800 PetscFunctionBegin; 2801 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2802 2803 if (reuse == MAT_REUSE_MATRIX) { 2804 B = *newmat; 2805 } else { 2806 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2807 PetscCall(MatSetType(B, MATMPIAIJ)); 2808 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2809 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2810 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2811 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2812 } 2813 b = (Mat_MPIAIJ *)B->data; 2814 2815 if (reuse == MAT_REUSE_MATRIX) { 2816 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2817 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2818 } else { 2819 PetscBool3 sym = A->symmetric, hermitian = A->hermitian, structurally_symmetric = A->structurally_symmetric, spd = A->spd; 2820 PetscCall(MatDestroy(&b->A)); 2821 PetscCall(MatDestroy(&b->B)); 2822 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2823 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2824 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2825 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2826 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2827 A->symmetric = sym; 2828 A->hermitian = hermitian; 2829 A->structurally_symmetric = structurally_symmetric; 2830 A->spd = spd; 2831 } 2832 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2833 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2834 2835 if (reuse == MAT_INPLACE_MATRIX) { 2836 PetscCall(MatHeaderReplace(A, &B)); 2837 } else { 2838 *newmat = B; 2839 } 2840 PetscFunctionReturn(PETSC_SUCCESS); 2841 } 2842 2843 /*MC 2844 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2845 2846 Options Database Keys: 2847 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2848 . -mat_block_size <bs> - set the blocksize used to store the matrix 2849 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2850 - -mat_use_hash_table <fact> - set hash table factor 2851 2852 Level: beginner 2853 2854 Note: 2855 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 2856 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2857 2858 .seealso: `Mat`, MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ` 2859 M*/ 2860 2861 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2862 2863 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2864 { 2865 Mat_MPIBAIJ *b; 2866 PetscBool flg = PETSC_FALSE; 2867 2868 PetscFunctionBegin; 2869 PetscCall(PetscNew(&b)); 2870 B->data = (void *)b; 2871 2872 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2873 B->assembled = PETSC_FALSE; 2874 2875 B->insertmode = NOT_SET_VALUES; 2876 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2877 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2878 2879 /* build local table of row and column ownerships */ 2880 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2881 2882 /* build cache for off array entries formed */ 2883 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2884 2885 b->donotstash = PETSC_FALSE; 2886 b->colmap = NULL; 2887 b->garray = NULL; 2888 b->roworiented = PETSC_TRUE; 2889 2890 /* stuff used in block assembly */ 2891 b->barray = NULL; 2892 2893 /* stuff used for matrix vector multiply */ 2894 b->lvec = NULL; 2895 b->Mvctx = NULL; 2896 2897 /* stuff for MatGetRow() */ 2898 b->rowindices = NULL; 2899 b->rowvalues = NULL; 2900 b->getrowactive = PETSC_FALSE; 2901 2902 /* hash table stuff */ 2903 b->ht = NULL; 2904 b->hd = NULL; 2905 b->ht_size = 0; 2906 b->ht_flag = PETSC_FALSE; 2907 b->ht_fact = 0; 2908 b->ht_total_ct = 0; 2909 b->ht_insert_ct = 0; 2910 2911 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2912 b->ijonly = PETSC_FALSE; 2913 2914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2917 #if defined(PETSC_HAVE_HYPRE) 2918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2919 #endif 2920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2927 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2928 2929 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2930 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2931 if (flg) { 2932 PetscReal fact = 1.39; 2933 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2934 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2935 if (fact <= 1.0) fact = 1.39; 2936 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2937 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2938 } 2939 PetscOptionsEnd(); 2940 PetscFunctionReturn(PETSC_SUCCESS); 2941 } 2942 2943 /*MC 2944 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2945 2946 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2947 and `MATMPIBAIJ` otherwise. 2948 2949 Options Database Keys: 2950 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2951 2952 Level: beginner 2953 2954 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2955 M*/ 2956 2957 /*@C 2958 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2959 (block compressed row). 2960 2961 Collective 2962 2963 Input Parameters: 2964 + B - the matrix 2965 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2966 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2967 . d_nz - number of block nonzeros per block row in diagonal portion of local 2968 submatrix (same for all local rows) 2969 . d_nnz - array containing the number of block nonzeros in the various block rows 2970 of the in diagonal portion of the local (possibly different for each block 2971 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2972 set it even if it is zero. 2973 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2974 submatrix (same for all local rows). 2975 - o_nnz - array containing the number of nonzeros in the various block rows of the 2976 off-diagonal portion of the local submatrix (possibly different for 2977 each block row) or `NULL`. 2978 2979 If the *_nnz parameter is given then the *_nz parameter is ignored 2980 2981 Options Database Keys: 2982 + -mat_block_size - size of the blocks to use 2983 - -mat_use_hash_table <fact> - set hash table factor 2984 2985 Level: intermediate 2986 2987 Notes: 2988 For good matrix assembly performance 2989 the user should preallocate the matrix storage by setting the parameters 2990 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 2991 performance can be increased by more than a factor of 50. 2992 2993 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2994 than it must be used on all processors that share the object for that argument. 2995 2996 Storage Information: 2997 For a square global matrix we define each processor's diagonal portion 2998 to be its local rows and the corresponding columns (a square submatrix); 2999 each processor's off-diagonal portion encompasses the remainder of the 3000 local matrix (a rectangular submatrix). 3001 3002 The user can specify preallocated storage for the diagonal part of 3003 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3004 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3005 memory allocation. Likewise, specify preallocated storage for the 3006 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3007 3008 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3009 the figure below we depict these three local rows and all columns (0-11). 3010 3011 .vb 3012 0 1 2 3 4 5 6 7 8 9 10 11 3013 -------------------------- 3014 row 3 |o o o d d d o o o o o o 3015 row 4 |o o o d d d o o o o o o 3016 row 5 |o o o d d d o o o o o o 3017 -------------------------- 3018 .ve 3019 3020 Thus, any entries in the d locations are stored in the d (diagonal) 3021 submatrix, and any entries in the o locations are stored in the 3022 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3023 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3024 3025 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3026 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3027 In general, for PDE problems in which most nonzeros are near the diagonal, 3028 one expects `d_nz` >> `o_nz`. For large problems you MUST preallocate memory 3029 or you will get TERRIBLE performance; see the users' manual chapter on 3030 matrices. 3031 3032 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3033 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3034 You can also run with the option `-info` and look for messages with the string 3035 malloc in them to see if additional memory allocation was needed. 3036 3037 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3038 @*/ 3039 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3040 { 3041 PetscFunctionBegin; 3042 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3043 PetscValidType(B, 1); 3044 PetscValidLogicalCollectiveInt(B, bs, 2); 3045 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3046 PetscFunctionReturn(PETSC_SUCCESS); 3047 } 3048 3049 /*@C 3050 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3051 (block compressed row). 3052 3053 Collective 3054 3055 Input Parameters: 3056 + comm - MPI communicator 3057 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3058 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3059 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3060 This value should be the same as the local size used in creating the 3061 y vector for the matrix-vector product y = Ax. 3062 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3063 This value should be the same as the local size used in creating the 3064 x vector for the matrix-vector product y = Ax. 3065 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3066 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3067 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3068 submatrix (same for all local rows) 3069 . d_nnz - array containing the number of nonzero blocks in the various block rows 3070 of the in diagonal portion of the local (possibly different for each block 3071 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3072 and set it even if it is zero. 3073 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3074 submatrix (same for all local rows). 3075 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3076 off-diagonal portion of the local submatrix (possibly different for 3077 each block row) or NULL. 3078 3079 Output Parameter: 3080 . A - the matrix 3081 3082 Options Database Keys: 3083 + -mat_block_size - size of the blocks to use 3084 - -mat_use_hash_table <fact> - set hash table factor 3085 3086 Level: intermediate 3087 3088 Notes: 3089 For good matrix assembly performance 3090 the user should preallocate the matrix storage by setting the parameters 3091 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3092 performance can be increased by more than a factor of 50. 3093 3094 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3095 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3096 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3097 3098 If the *_nnz parameter is given then the *_nz parameter is ignored 3099 3100 A nonzero block is any block that as 1 or more nonzeros in it 3101 3102 The user MUST specify either the local or global matrix dimensions 3103 (possibly both). 3104 3105 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3106 than it must be used on all processors that share the object for that argument. 3107 3108 Storage Information: 3109 For a square global matrix we define each processor's diagonal portion 3110 to be its local rows and the corresponding columns (a square submatrix); 3111 each processor's off-diagonal portion encompasses the remainder of the 3112 local matrix (a rectangular submatrix). 3113 3114 The user can specify preallocated storage for the diagonal part of 3115 the local submatrix with either d_nz or d_nnz (not both). Set 3116 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3117 memory allocation. Likewise, specify preallocated storage for the 3118 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3119 3120 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3121 the figure below we depict these three local rows and all columns (0-11). 3122 3123 .vb 3124 0 1 2 3 4 5 6 7 8 9 10 11 3125 -------------------------- 3126 row 3 |o o o d d d o o o o o o 3127 row 4 |o o o d d d o o o o o o 3128 row 5 |o o o d d d o o o o o o 3129 -------------------------- 3130 .ve 3131 3132 Thus, any entries in the d locations are stored in the d (diagonal) 3133 submatrix, and any entries in the o locations are stored in the 3134 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3135 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3136 3137 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3138 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3139 In general, for PDE problems in which most nonzeros are near the diagonal, 3140 one expects `d_nz` >> `o_nz`. For large problems you MUST preallocate memory 3141 or you will get TERRIBLE performance; see the users' manual chapter on 3142 matrices. 3143 3144 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3145 @*/ 3146 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3147 { 3148 PetscMPIInt size; 3149 3150 PetscFunctionBegin; 3151 PetscCall(MatCreate(comm, A)); 3152 PetscCall(MatSetSizes(*A, m, n, M, N)); 3153 PetscCallMPI(MPI_Comm_size(comm, &size)); 3154 if (size > 1) { 3155 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3156 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3157 } else { 3158 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3159 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3160 } 3161 PetscFunctionReturn(PETSC_SUCCESS); 3162 } 3163 3164 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3165 { 3166 Mat mat; 3167 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3168 PetscInt len = 0; 3169 3170 PetscFunctionBegin; 3171 *newmat = NULL; 3172 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3173 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3174 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3175 3176 mat->factortype = matin->factortype; 3177 mat->preallocated = PETSC_TRUE; 3178 mat->assembled = PETSC_TRUE; 3179 mat->insertmode = NOT_SET_VALUES; 3180 3181 a = (Mat_MPIBAIJ *)mat->data; 3182 mat->rmap->bs = matin->rmap->bs; 3183 a->bs2 = oldmat->bs2; 3184 a->mbs = oldmat->mbs; 3185 a->nbs = oldmat->nbs; 3186 a->Mbs = oldmat->Mbs; 3187 a->Nbs = oldmat->Nbs; 3188 3189 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3190 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3191 3192 a->size = oldmat->size; 3193 a->rank = oldmat->rank; 3194 a->donotstash = oldmat->donotstash; 3195 a->roworiented = oldmat->roworiented; 3196 a->rowindices = NULL; 3197 a->rowvalues = NULL; 3198 a->getrowactive = PETSC_FALSE; 3199 a->barray = NULL; 3200 a->rstartbs = oldmat->rstartbs; 3201 a->rendbs = oldmat->rendbs; 3202 a->cstartbs = oldmat->cstartbs; 3203 a->cendbs = oldmat->cendbs; 3204 3205 /* hash table stuff */ 3206 a->ht = NULL; 3207 a->hd = NULL; 3208 a->ht_size = 0; 3209 a->ht_flag = oldmat->ht_flag; 3210 a->ht_fact = oldmat->ht_fact; 3211 a->ht_total_ct = 0; 3212 a->ht_insert_ct = 0; 3213 3214 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3215 if (oldmat->colmap) { 3216 #if defined(PETSC_USE_CTABLE) 3217 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3218 #else 3219 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3220 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3221 #endif 3222 } else a->colmap = NULL; 3223 3224 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3225 PetscCall(PetscMalloc1(len, &a->garray)); 3226 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3227 } else a->garray = NULL; 3228 3229 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3230 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3231 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3232 3233 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3234 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3235 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3236 *newmat = mat; 3237 PetscFunctionReturn(PETSC_SUCCESS); 3238 } 3239 3240 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3241 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3242 { 3243 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3244 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3245 PetscScalar *matvals; 3246 3247 PetscFunctionBegin; 3248 PetscCall(PetscViewerSetUp(viewer)); 3249 3250 /* read in matrix header */ 3251 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3252 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3253 M = header[1]; 3254 N = header[2]; 3255 nz = header[3]; 3256 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3257 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3258 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3259 3260 /* set block sizes from the viewer's .info file */ 3261 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3262 /* set local sizes if not set already */ 3263 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3264 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3265 /* set global sizes if not set already */ 3266 if (mat->rmap->N < 0) mat->rmap->N = M; 3267 if (mat->cmap->N < 0) mat->cmap->N = N; 3268 PetscCall(PetscLayoutSetUp(mat->rmap)); 3269 PetscCall(PetscLayoutSetUp(mat->cmap)); 3270 3271 /* check if the matrix sizes are correct */ 3272 PetscCall(MatGetSize(mat, &rows, &cols)); 3273 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3274 PetscCall(MatGetBlockSize(mat, &bs)); 3275 PetscCall(MatGetLocalSize(mat, &m, &n)); 3276 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3277 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3278 mbs = m / bs; 3279 nbs = n / bs; 3280 3281 /* read in row lengths and build row indices */ 3282 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3283 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3284 rowidxs[0] = 0; 3285 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3286 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3287 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3288 3289 /* read in column indices and matrix values */ 3290 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3291 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3292 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3293 3294 { /* preallocate matrix storage */ 3295 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3296 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3297 PetscBool sbaij, done; 3298 PetscInt *d_nnz, *o_nnz; 3299 3300 PetscCall(PetscBTCreate(nbs, &bt)); 3301 PetscCall(PetscHSetICreate(&ht)); 3302 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3303 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3304 for (i = 0; i < mbs; i++) { 3305 PetscCall(PetscBTMemzero(nbs, bt)); 3306 PetscCall(PetscHSetIClear(ht)); 3307 for (k = 0; k < bs; k++) { 3308 PetscInt row = bs * i + k; 3309 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3310 PetscInt col = colidxs[j]; 3311 if (!sbaij || col >= row) { 3312 if (col >= cs && col < ce) { 3313 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3314 } else { 3315 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3316 if (done) o_nnz[i]++; 3317 } 3318 } 3319 } 3320 } 3321 } 3322 PetscCall(PetscBTDestroy(&bt)); 3323 PetscCall(PetscHSetIDestroy(&ht)); 3324 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3325 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3326 PetscCall(PetscFree2(d_nnz, o_nnz)); 3327 } 3328 3329 /* store matrix values */ 3330 for (i = 0; i < m; i++) { 3331 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3332 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3333 } 3334 3335 PetscCall(PetscFree(rowidxs)); 3336 PetscCall(PetscFree2(colidxs, matvals)); 3337 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3338 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3339 PetscFunctionReturn(PETSC_SUCCESS); 3340 } 3341 3342 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3343 { 3344 PetscBool isbinary; 3345 3346 PetscFunctionBegin; 3347 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3348 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3349 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3350 PetscFunctionReturn(PETSC_SUCCESS); 3351 } 3352 3353 /*@ 3354 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3355 3356 Input Parameters: 3357 + mat - the matrix 3358 - fact - factor 3359 3360 Options Database Key: 3361 . -mat_use_hash_table <fact> - provide the factor 3362 3363 Level: advanced 3364 3365 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3366 @*/ 3367 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3368 { 3369 PetscFunctionBegin; 3370 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3371 PetscFunctionReturn(PETSC_SUCCESS); 3372 } 3373 3374 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3375 { 3376 Mat_MPIBAIJ *baij; 3377 3378 PetscFunctionBegin; 3379 baij = (Mat_MPIBAIJ *)mat->data; 3380 baij->ht_fact = fact; 3381 PetscFunctionReturn(PETSC_SUCCESS); 3382 } 3383 3384 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3385 { 3386 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3387 PetscBool flg; 3388 3389 PetscFunctionBegin; 3390 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3391 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3392 if (Ad) *Ad = a->A; 3393 if (Ao) *Ao = a->B; 3394 if (colmap) *colmap = a->garray; 3395 PetscFunctionReturn(PETSC_SUCCESS); 3396 } 3397 3398 /* 3399 Special version for direct calls from Fortran (to eliminate two function call overheads 3400 */ 3401 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3402 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3403 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3404 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3405 #endif 3406 3407 /*@C 3408 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3409 3410 Collective 3411 3412 Input Parameters: 3413 + mat - the matrix 3414 . min - number of input rows 3415 . im - input rows 3416 . nin - number of input columns 3417 . in - input columns 3418 . v - numerical values input 3419 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3420 3421 Developer Note: 3422 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3423 3424 Level: advanced 3425 3426 .seealso: `Mat`, `MatSetValuesBlocked()` 3427 @*/ 3428 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3429 { 3430 /* convert input arguments to C version */ 3431 Mat mat = *matin; 3432 PetscInt m = *min, n = *nin; 3433 InsertMode addv = *addvin; 3434 3435 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3436 const MatScalar *value; 3437 MatScalar *barray = baij->barray; 3438 PetscBool roworiented = baij->roworiented; 3439 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3440 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3441 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3442 3443 PetscFunctionBegin; 3444 /* tasks normally handled by MatSetValuesBlocked() */ 3445 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3446 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3447 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3448 if (mat->assembled) { 3449 mat->was_assembled = PETSC_TRUE; 3450 mat->assembled = PETSC_FALSE; 3451 } 3452 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3453 3454 if (!barray) { 3455 PetscCall(PetscMalloc1(bs2, &barray)); 3456 baij->barray = barray; 3457 } 3458 3459 if (roworiented) stepval = (n - 1) * bs; 3460 else stepval = (m - 1) * bs; 3461 3462 for (i = 0; i < m; i++) { 3463 if (im[i] < 0) continue; 3464 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3465 if (im[i] >= rstart && im[i] < rend) { 3466 row = im[i] - rstart; 3467 for (j = 0; j < n; j++) { 3468 /* If NumCol = 1 then a copy is not required */ 3469 if ((roworiented) && (n == 1)) { 3470 barray = (MatScalar *)v + i * bs2; 3471 } else if ((!roworiented) && (m == 1)) { 3472 barray = (MatScalar *)v + j * bs2; 3473 } else { /* Here a copy is required */ 3474 if (roworiented) { 3475 value = v + i * (stepval + bs) * bs + j * bs; 3476 } else { 3477 value = v + j * (stepval + bs) * bs + i * bs; 3478 } 3479 for (ii = 0; ii < bs; ii++, value += stepval) { 3480 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3481 } 3482 barray -= bs2; 3483 } 3484 3485 if (in[j] >= cstart && in[j] < cend) { 3486 col = in[j] - cstart; 3487 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3488 } else if (in[j] < 0) { 3489 continue; 3490 } else { 3491 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3492 if (mat->was_assembled) { 3493 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3494 3495 #if defined(PETSC_USE_DEBUG) 3496 #if defined(PETSC_USE_CTABLE) 3497 { 3498 PetscInt data; 3499 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3500 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3501 } 3502 #else 3503 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3504 #endif 3505 #endif 3506 #if defined(PETSC_USE_CTABLE) 3507 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3508 col = (col - 1) / bs; 3509 #else 3510 col = (baij->colmap[in[j]] - 1) / bs; 3511 #endif 3512 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3513 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3514 col = in[j]; 3515 } 3516 } else col = in[j]; 3517 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3518 } 3519 } 3520 } else { 3521 if (!baij->donotstash) { 3522 if (roworiented) { 3523 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3524 } else { 3525 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3526 } 3527 } 3528 } 3529 } 3530 3531 /* task normally handled by MatSetValuesBlocked() */ 3532 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3533 PetscFunctionReturn(PETSC_SUCCESS); 3534 } 3535 3536 /*@ 3537 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3538 CSR format the local rows. 3539 3540 Collective 3541 3542 Input Parameters: 3543 + comm - MPI communicator 3544 . bs - the block size, only a block size of 1 is supported 3545 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3546 . n - This value should be the same as the local size used in creating the 3547 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3548 calculated if N is given) For square matrices n is almost always m. 3549 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3550 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3551 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3552 . j - column indices 3553 - a - matrix values 3554 3555 Output Parameter: 3556 . mat - the matrix 3557 3558 Level: intermediate 3559 3560 Notes: 3561 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3562 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3563 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3564 3565 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3566 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3567 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3568 with column-major ordering within blocks. 3569 3570 The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array. 3571 3572 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3573 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3574 @*/ 3575 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3576 { 3577 PetscFunctionBegin; 3578 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3579 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3580 PetscCall(MatCreate(comm, mat)); 3581 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3582 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3583 PetscCall(MatSetBlockSize(*mat, bs)); 3584 PetscCall(MatSetUp(*mat)); 3585 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3586 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3587 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3588 PetscFunctionReturn(PETSC_SUCCESS); 3589 } 3590 3591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3592 { 3593 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3594 PetscInt *indx; 3595 PetscScalar *values; 3596 3597 PetscFunctionBegin; 3598 PetscCall(MatGetSize(inmat, &m, &N)); 3599 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3600 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3601 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3602 PetscInt *bindx, rmax = a->rmax, j; 3603 PetscMPIInt rank, size; 3604 3605 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3606 mbs = m / bs; 3607 Nbs = N / cbs; 3608 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3609 nbs = n / cbs; 3610 3611 PetscCall(PetscMalloc1(rmax, &bindx)); 3612 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3613 3614 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3615 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3616 if (rank == size - 1) { 3617 /* Check sum(nbs) = Nbs */ 3618 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3619 } 3620 3621 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3622 for (i = 0; i < mbs; i++) { 3623 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3624 nnz = nnz / bs; 3625 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3626 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3627 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3628 } 3629 PetscCall(PetscFree(bindx)); 3630 3631 PetscCall(MatCreate(comm, outmat)); 3632 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3633 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3634 PetscCall(MatSetType(*outmat, MATBAIJ)); 3635 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3636 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3637 MatPreallocateEnd(dnz, onz); 3638 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3639 } 3640 3641 /* numeric phase */ 3642 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3643 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3644 3645 for (i = 0; i < m; i++) { 3646 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3647 Ii = i + rstart; 3648 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3649 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3650 } 3651 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3652 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3653 PetscFunctionReturn(PETSC_SUCCESS); 3654 } 3655