1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 15 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 16 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 17 PetscCall(MatDestroy(&B)); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 22 { 23 Mat B; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 27 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 28 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 29 PetscFunctionReturn(0); 30 } 31 32 /*MC 33 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 36 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 37 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 43 44 Developer Note: 45 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 46 enough exist. 47 48 Level: beginner 49 50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 51 M*/ 52 53 /*MC 54 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 55 56 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 57 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 58 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 59 for communicators controlling multiple processes. It is recommended that you call both of 60 the above preallocation routines for simplicity. 61 62 Options Database Keys: 63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 64 65 Level: beginner 66 67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 68 M*/ 69 70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 71 { 72 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 73 74 PetscFunctionBegin; 75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 76 A->boundtocpu = flg; 77 #endif 78 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 79 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 80 81 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 82 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 83 * to differ from the parent matrix. */ 84 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 85 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 86 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 91 { 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 93 94 PetscFunctionBegin; 95 if (mat->A) { 96 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 97 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 103 { 104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 106 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 107 const PetscInt *ia, *ib; 108 const MatScalar *aa, *bb, *aav, *bav; 109 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 110 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 111 112 PetscFunctionBegin; 113 *keptrows = NULL; 114 115 ia = a->i; 116 ib = b->i; 117 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 118 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 119 for (i = 0; i < m; i++) { 120 na = ia[i + 1] - ia[i]; 121 nb = ib[i + 1] - ib[i]; 122 if (!na && !nb) { 123 cnt++; 124 goto ok1; 125 } 126 aa = aav + ia[i]; 127 for (j = 0; j < na; j++) { 128 if (aa[j] != 0.0) goto ok1; 129 } 130 bb = bav + ib[i]; 131 for (j = 0; j < nb; j++) { 132 if (bb[j] != 0.0) goto ok1; 133 } 134 cnt++; 135 ok1:; 136 } 137 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 138 if (!n0rows) { 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 140 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 141 PetscFunctionReturn(0); 142 } 143 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 144 cnt = 0; 145 for (i = 0; i < m; i++) { 146 na = ia[i + 1] - ia[i]; 147 nb = ib[i + 1] - ib[i]; 148 if (!na && !nb) continue; 149 aa = aav + ia[i]; 150 for (j = 0; j < na; j++) { 151 if (aa[j] != 0.0) { 152 rows[cnt++] = rstart + i; 153 goto ok2; 154 } 155 } 156 bb = bav + ib[i]; 157 for (j = 0; j < nb; j++) { 158 if (bb[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 ok2:; 164 } 165 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 167 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 174 PetscBool cong; 175 176 PetscFunctionBegin; 177 PetscCall(MatHasCongruentLayouts(Y, &cong)); 178 if (Y->assembled && cong) { 179 PetscCall(MatDiagonalSet(aij->A, D, is)); 180 } else { 181 PetscCall(MatDiagonalSet_Default(Y, D, is)); 182 } 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 189 PetscInt i, rstart, nrows, *rows; 190 191 PetscFunctionBegin; 192 *zrows = NULL; 193 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 194 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 195 for (i = 0; i < nrows; i++) rows[i] += rstart; 196 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 201 { 202 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 203 PetscInt i, m, n, *garray = aij->garray; 204 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 205 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 206 PetscReal *work; 207 const PetscScalar *dummy; 208 209 PetscFunctionBegin; 210 PetscCall(MatGetSize(A, &m, &n)); 211 PetscCall(PetscCalloc1(n, &work)); 212 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 213 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 214 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 215 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 216 if (type == NORM_2) { 217 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 218 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 219 } else if (type == NORM_1) { 220 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 221 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 222 } else if (type == NORM_INFINITY) { 223 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 225 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 226 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 227 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 229 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 230 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 231 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 234 } else { 235 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 236 } 237 PetscCall(PetscFree(work)); 238 if (type == NORM_2) { 239 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i = 0; i < n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 249 IS sis, gis; 250 const PetscInt *isis, *igis; 251 PetscInt n, *iis, nsis, ngis, rstart, i; 252 253 PetscFunctionBegin; 254 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 255 PetscCall(MatFindNonzeroRows(a->B, &gis)); 256 PetscCall(ISGetSize(gis, &ngis)); 257 PetscCall(ISGetSize(sis, &nsis)); 258 PetscCall(ISGetIndices(sis, &isis)); 259 PetscCall(ISGetIndices(gis, &igis)); 260 261 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 262 PetscCall(PetscArraycpy(iis, igis, ngis)); 263 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 264 n = ngis + nsis; 265 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 266 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 267 for (i = 0; i < n; i++) iis[i] += rstart; 268 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 269 270 PetscCall(ISRestoreIndices(sis, &isis)); 271 PetscCall(ISRestoreIndices(gis, &igis)); 272 PetscCall(ISDestroy(&sis)); 273 PetscCall(ISDestroy(&gis)); 274 PetscFunctionReturn(0); 275 } 276 277 /* 278 Local utility routine that creates a mapping from the global column 279 number to the local number in the off-diagonal part of the local 280 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 281 a slightly higher hash table cost; without it it is not scalable (each processor 282 has an order N integer array but is fast to access. 283 */ 284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 287 PetscInt n = aij->B->cmap->n, i; 288 289 PetscFunctionBegin; 290 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 293 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 294 #else 295 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 296 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 297 #endif 298 PetscFunctionReturn(0); 299 } 300 301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 302 { \ 303 if (col <= lastcol1) low1 = 0; \ 304 else high1 = nrow1; \ 305 lastcol1 = col; \ 306 while (high1 - low1 > 5) { \ 307 t = (low1 + high1) / 2; \ 308 if (rp1[t] > col) high1 = t; \ 309 else low1 = t; \ 310 } \ 311 for (_i = low1; _i < high1; _i++) { \ 312 if (rp1[_i] > col) break; \ 313 if (rp1[_i] == col) { \ 314 if (addv == ADD_VALUES) { \ 315 ap1[_i] += value; \ 316 /* Not sure LogFlops will slow dow the code or not */ \ 317 (void)PetscLogFlops(1.0); \ 318 } else ap1[_i] = value; \ 319 goto a_noinsert; \ 320 } \ 321 } \ 322 if (value == 0.0 && ignorezeroentries && row != col) { \ 323 low1 = 0; \ 324 high1 = nrow1; \ 325 goto a_noinsert; \ 326 } \ 327 if (nonew == 1) { \ 328 low1 = 0; \ 329 high1 = nrow1; \ 330 goto a_noinsert; \ 331 } \ 332 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 333 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 334 N = nrow1++ - 1; \ 335 a->nz++; \ 336 high1++; \ 337 /* shift up all the later entries in this row */ \ 338 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 339 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 340 rp1[_i] = col; \ 341 ap1[_i] = value; \ 342 A->nonzerostate++; \ 343 a_noinsert:; \ 344 ailen[row] = nrow1; \ 345 } 346 347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 348 { \ 349 if (col <= lastcol2) low2 = 0; \ 350 else high2 = nrow2; \ 351 lastcol2 = col; \ 352 while (high2 - low2 > 5) { \ 353 t = (low2 + high2) / 2; \ 354 if (rp2[t] > col) high2 = t; \ 355 else low2 = t; \ 356 } \ 357 for (_i = low2; _i < high2; _i++) { \ 358 if (rp2[_i] > col) break; \ 359 if (rp2[_i] == col) { \ 360 if (addv == ADD_VALUES) { \ 361 ap2[_i] += value; \ 362 (void)PetscLogFlops(1.0); \ 363 } else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) { \ 368 low2 = 0; \ 369 high2 = nrow2; \ 370 goto b_noinsert; \ 371 } \ 372 if (nonew == 1) { \ 373 low2 = 0; \ 374 high2 = nrow2; \ 375 goto b_noinsert; \ 376 } \ 377 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 379 N = nrow2++ - 1; \ 380 b->nz++; \ 381 high2++; \ 382 /* shift up all the later entries in this row */ \ 383 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 384 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 385 rp2[_i] = col; \ 386 ap2[_i] = value; \ 387 B->nonzerostate++; \ 388 b_noinsert:; \ 389 bilen[row] = nrow2; \ 390 } 391 392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 393 { 394 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 395 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 396 PetscInt l, *garray = mat->garray, diag; 397 PetscScalar *aa, *ba; 398 399 PetscFunctionBegin; 400 /* code only works for square matrices A */ 401 402 /* find size of row to the left of the diagonal part */ 403 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 404 row = row - diag; 405 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 406 if (garray[b->j[b->i[row] + l]] > diag) break; 407 } 408 if (l) { 409 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 410 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 411 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 412 } 413 414 /* diagonal part */ 415 if (a->i[row + 1] - a->i[row]) { 416 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 417 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 418 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 419 } 420 421 /* right of diagonal part */ 422 if (b->i[row + 1] - b->i[row] - l) { 423 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 424 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 425 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 431 { 432 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 433 PetscScalar value = 0.0; 434 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 441 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 445 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 446 MatScalar *aa, *ba; 447 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1, *ap2; 450 451 PetscFunctionBegin; 452 PetscCall(MatSeqAIJGetArray(A, &aa)); 453 PetscCall(MatSeqAIJGetArray(B, &ba)); 454 for (i = 0; i < m; i++) { 455 if (im[i] < 0) continue; 456 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j = 0; j < n; j++) { 475 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 481 } else if (in[j] < 0) { 482 continue; 483 } else { 484 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 485 if (mat->was_assembled) { 486 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 487 #if defined(PETSC_USE_CTABLE) 488 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 489 col--; 490 #else 491 col = aij->colmap[in[j]] - 1; 492 #endif 493 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 494 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 495 col = in[j]; 496 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 497 B = aij->B; 498 b = (Mat_SeqAIJ *)B->data; 499 bimax = b->imax; 500 bi = b->i; 501 bilen = b->ilen; 502 bj = b->j; 503 ba = b->a; 504 rp2 = bj + bi[row]; 505 ap2 = ba + bi[row]; 506 rmax2 = bimax[row]; 507 nrow2 = bilen[row]; 508 low2 = 0; 509 high2 = nrow2; 510 bm = aij->B->rmap->n; 511 ba = b->a; 512 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 513 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 514 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 515 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 516 } 517 } else col = in[j]; 518 nonew = b->nonew; 519 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 520 } 521 } 522 } else { 523 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 524 if (!aij->donotstash) { 525 mat->assembled = PETSC_FALSE; 526 if (roworiented) { 527 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 528 } else { 529 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } 531 } 532 } 533 } 534 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 535 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 536 PetscFunctionReturn(0); 537 } 538 539 /* 540 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 541 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 542 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 543 */ 544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 545 { 546 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 547 Mat A = aij->A; /* diagonal part of the matrix */ 548 Mat B = aij->B; /* offdiagonal part of the matrix */ 549 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 550 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 551 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 552 PetscInt *ailen = a->ilen, *aj = a->j; 553 PetscInt *bilen = b->ilen, *bj = b->j; 554 PetscInt am = aij->A->rmap->n, j; 555 PetscInt diag_so_far = 0, dnz; 556 PetscInt offd_so_far = 0, onz; 557 558 PetscFunctionBegin; 559 /* Iterate over all rows of the matrix */ 560 for (j = 0; j < am; j++) { 561 dnz = onz = 0; 562 /* Iterate over all non-zero columns of the current row */ 563 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 564 /* If column is in the diagonal */ 565 if (mat_j[col] >= cstart && mat_j[col] < cend) { 566 aj[diag_so_far++] = mat_j[col] - cstart; 567 dnz++; 568 } else { /* off-diagonal entries */ 569 bj[offd_so_far++] = mat_j[col]; 570 onz++; 571 } 572 } 573 ailen[j] = dnz; 574 bilen[j] = onz; 575 } 576 PetscFunctionReturn(0); 577 } 578 579 /* 580 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 581 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 582 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 583 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 584 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 585 */ 586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 587 { 588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 589 Mat A = aij->A; /* diagonal part of the matrix */ 590 Mat B = aij->B; /* offdiagonal part of the matrix */ 591 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 593 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 594 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 595 PetscInt *ailen = a->ilen, *aj = a->j; 596 PetscInt *bilen = b->ilen, *bj = b->j; 597 PetscInt am = aij->A->rmap->n, j; 598 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 599 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 600 PetscScalar *aa = a->a, *ba = b->a; 601 602 PetscFunctionBegin; 603 /* Iterate over all rows of the matrix */ 604 for (j = 0; j < am; j++) { 605 dnz_row = onz_row = 0; 606 rowstart_offd = full_offd_i[j]; 607 rowstart_diag = full_diag_i[j]; 608 /* Iterate over all non-zero columns of the current row */ 609 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 610 /* If column is in the diagonal */ 611 if (mat_j[col] >= cstart && mat_j[col] < cend) { 612 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 613 aa[rowstart_diag + dnz_row] = mat_a[col]; 614 dnz_row++; 615 } else { /* off-diagonal entries */ 616 bj[rowstart_offd + onz_row] = mat_j[col]; 617 ba[rowstart_offd + onz_row] = mat_a[col]; 618 onz_row++; 619 } 620 } 621 ailen[j] = dnz_row; 622 bilen[j] = onz_row; 623 } 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 628 { 629 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 630 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 632 633 PetscFunctionBegin; 634 for (i = 0; i < m; i++) { 635 if (idxm[i] < 0) continue; /* negative row */ 636 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j = 0; j < n; j++) { 640 if (idxn[j] < 0) continue; /* negative column */ 641 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 645 } else { 646 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 647 #if defined(PETSC_USE_CTABLE) 648 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 654 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 655 } 656 } 657 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 658 } 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 663 { 664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 665 PetscInt nstash, reallocs; 666 667 PetscFunctionBegin; 668 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 669 670 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 671 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 672 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 673 PetscFunctionReturn(0); 674 } 675 676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 679 PetscMPIInt n; 680 PetscInt i, j, rstart, ncols, flg; 681 PetscInt *row, *col; 682 PetscBool other_disassembled; 683 PetscScalar *val; 684 685 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 686 687 PetscFunctionBegin; 688 if (!aij->donotstash && !mat->nooffprocentries) { 689 while (1) { 690 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 691 if (!flg) break; 692 693 for (i = 0; i < n;) { 694 /* Now identify the consecutive vals belonging to the same row */ 695 for (j = i, rstart = row[j]; j < n; j++) { 696 if (row[j] != rstart) break; 697 } 698 if (j < n) ncols = j - i; 699 else ncols = n - i; 700 /* Now assemble all these values with a single function call */ 701 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 702 i = j; 703 } 704 } 705 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 706 } 707 #if defined(PETSC_HAVE_DEVICE) 708 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 709 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 710 if (mat->boundtocpu) { 711 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 712 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 713 } 714 #endif 715 PetscCall(MatAssemblyBegin(aij->A, mode)); 716 PetscCall(MatAssemblyEnd(aij->A, mode)); 717 718 /* determine if any processor has disassembled, if so we must 719 also disassemble ourself, in order that we may reassemble. */ 720 /* 721 if nonzero structure of submatrix B cannot change then we know that 722 no processor disassembled thus we can skip this stuff 723 */ 724 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 725 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 726 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 727 PetscCall(MatDisAssemble_MPIAIJ(mat)); 728 } 729 } 730 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 731 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 732 #if defined(PETSC_HAVE_DEVICE) 733 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 734 #endif 735 PetscCall(MatAssemblyBegin(aij->B, mode)); 736 PetscCall(MatAssemblyEnd(aij->B, mode)); 737 738 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 739 740 aij->rowvalues = NULL; 741 742 PetscCall(VecDestroy(&aij->diag)); 743 744 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 745 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 746 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 747 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 748 } 749 #if defined(PETSC_HAVE_DEVICE) 750 mat->offloadmask = PETSC_OFFLOAD_BOTH; 751 #endif 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 756 { 757 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 758 759 PetscFunctionBegin; 760 PetscCall(MatZeroEntries(l->A)); 761 PetscCall(MatZeroEntries(l->B)); 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 766 { 767 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 768 PetscObjectState sA, sB; 769 PetscInt *lrows; 770 PetscInt r, len; 771 PetscBool cong, lch, gch; 772 773 PetscFunctionBegin; 774 /* get locally owned rows */ 775 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 776 PetscCall(MatHasCongruentLayouts(A, &cong)); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 783 PetscCall(VecGetArrayRead(x, &xx)); 784 PetscCall(VecGetArray(b, &bb)); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 786 PetscCall(VecRestoreArrayRead(x, &xx)); 787 PetscCall(VecRestoreArray(b, &bb)); 788 } 789 790 sA = mat->A->nonzerostate; 791 sB = mat->B->nonzerostate; 792 793 if (diag != 0.0 && cong) { 794 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 795 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 796 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 797 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 798 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 799 PetscInt nnwA, nnwB; 800 PetscBool nnzA, nnzB; 801 802 nnwA = aijA->nonew; 803 nnwB = aijB->nonew; 804 nnzA = aijA->keepnonzeropattern; 805 nnzB = aijB->keepnonzeropattern; 806 if (!nnzA) { 807 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 808 aijA->nonew = 0; 809 } 810 if (!nnzB) { 811 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 812 aijB->nonew = 0; 813 } 814 /* Must zero here before the next loop */ 815 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 816 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 if (row >= A->cmap->N) continue; 820 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 821 } 822 aijA->nonew = nnwA; 823 aijB->nonew = nnwB; 824 } else { 825 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 826 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 827 } 828 PetscCall(PetscFree(lrows)); 829 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 830 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 831 832 /* reduce nonzerostate */ 833 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 834 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 835 if (gch) A->nonzerostate++; 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 840 { 841 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i, j, r, m, len = 0; 844 PetscInt *lrows, *owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb, *mask, *aij_a; 850 Vec xmask, lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 852 const PetscInt *aj, *ii, *ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 PetscCall(PetscMalloc1(n, &lrows)); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 PetscCall(PetscMalloc1(N, &rrows)); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 863 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 870 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 871 /* Collect flags for rows to be zeroed */ 872 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 873 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 874 PetscCall(PetscSFDestroy(&sf)); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) 877 if (lrows[r] >= 0) lrows[len++] = r; 878 /* zero diagonal part of matrix */ 879 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 880 /* handle off diagonal part of matrix */ 881 PetscCall(MatCreateVecs(A, &xmask, NULL)); 882 PetscCall(VecDuplicate(l->lvec, &lmask)); 883 PetscCall(VecGetArray(xmask, &bb)); 884 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 885 PetscCall(VecRestoreArray(xmask, &bb)); 886 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 888 PetscCall(VecDestroy(&xmask)); 889 if (x && b) { /* this code is buggy when the row and column layout don't match */ 890 PetscBool cong; 891 892 PetscCall(MatHasCongruentLayouts(A, &cong)); 893 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 894 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 896 PetscCall(VecGetArrayRead(l->lvec, &xx)); 897 PetscCall(VecGetArray(b, &bb)); 898 } 899 PetscCall(VecGetArray(lmask, &mask)); 900 /* remove zeroed rows of off diagonal matrix */ 901 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 902 ii = aij->i; 903 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i = 0; i < m; i++) { 910 n = ii[i + 1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij_a + ii[i]; 913 914 for (j = 0; j < n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa * xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i = 0; i < m; i++) { 927 n = ii[i + 1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij_a + ii[i]; 930 for (j = 0; j < n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa * xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 PetscCall(VecRestoreArray(b, &bb)); 942 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 943 } 944 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 945 PetscCall(VecRestoreArray(lmask, &mask)); 946 PetscCall(VecDestroy(&lmask)); 947 PetscCall(PetscFree(lrows)); 948 949 /* only change matrix nonzero state if pattern was allowed to be changed */ 950 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 951 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 952 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 958 { 959 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 PetscCall(VecGetLocalSize(xx, &nt)); 965 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 966 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 967 PetscUseTypeMethod(a->A, mult, xx, yy); 968 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 976 977 PetscFunctionBegin; 978 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 985 VecScatter Mvctx = a->Mvctx; 986 987 PetscFunctionBegin; 988 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 990 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 992 PetscFunctionReturn(0); 993 } 994 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 998 999 PetscFunctionBegin; 1000 /* do nondiagonal part */ 1001 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1002 /* do local part */ 1003 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1004 /* add partial results together */ 1005 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1006 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1011 { 1012 MPI_Comm comm; 1013 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 1014 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 1015 IS Me, Notme; 1016 PetscInt M, N, first, last, *notme, i; 1017 PetscBool lf; 1018 PetscMPIInt size; 1019 1020 PetscFunctionBegin; 1021 /* Easy test: symmetric diagonal block */ 1022 Bij = (Mat_MPIAIJ *)Bmat->data; 1023 Bdia = Bij->A; 1024 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1025 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1026 if (!*f) PetscFunctionReturn(0); 1027 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1028 PetscCallMPI(MPI_Comm_size(comm, &size)); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1032 PetscCall(MatGetSize(Amat, &M, &N)); 1033 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1034 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1035 for (i = 0; i < first; i++) notme[i] = i; 1036 for (i = last; i < M; i++) notme[i - last + first] = i; 1037 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1038 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1039 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1040 Aoff = Aoffs[0]; 1041 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1042 Boff = Boffs[0]; 1043 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1044 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1045 PetscCall(MatDestroyMatrices(1, &Boffs)); 1046 PetscCall(ISDestroy(&Me)); 1047 PetscCall(ISDestroy(&Notme)); 1048 PetscCall(PetscFree(notme)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1053 { 1054 PetscFunctionBegin; 1055 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1062 1063 PetscFunctionBegin; 1064 /* do nondiagonal part */ 1065 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1066 /* do local part */ 1067 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1068 /* add partial results together */ 1069 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1070 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 /* 1075 This only works correctly for square matrices where the subblock A->A is the 1076 diagonal block 1077 */ 1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1081 1082 PetscFunctionBegin; 1083 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1084 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1085 PetscCall(MatGetDiagonal(a->A, v)); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 PetscCall(MatScale(a->A, aa)); 1095 PetscCall(MatScale(a->B, aa)); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1101 { 1102 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1103 1104 PetscFunctionBegin; 1105 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1106 PetscCall(PetscFree(aij->Aperm1)); 1107 PetscCall(PetscFree(aij->Bperm1)); 1108 PetscCall(PetscFree(aij->Ajmap1)); 1109 PetscCall(PetscFree(aij->Bjmap1)); 1110 1111 PetscCall(PetscFree(aij->Aimap2)); 1112 PetscCall(PetscFree(aij->Bimap2)); 1113 PetscCall(PetscFree(aij->Aperm2)); 1114 PetscCall(PetscFree(aij->Bperm2)); 1115 PetscCall(PetscFree(aij->Ajmap2)); 1116 PetscCall(PetscFree(aij->Bjmap2)); 1117 1118 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1119 PetscCall(PetscFree(aij->Cperm1)); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1124 { 1125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1126 1127 PetscFunctionBegin; 1128 #if defined(PETSC_USE_LOG) 1129 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1130 #endif 1131 PetscCall(MatStashDestroy_Private(&mat->stash)); 1132 PetscCall(VecDestroy(&aij->diag)); 1133 PetscCall(MatDestroy(&aij->A)); 1134 PetscCall(MatDestroy(&aij->B)); 1135 #if defined(PETSC_USE_CTABLE) 1136 PetscCall(PetscHMapIDestroy(&aij->colmap)); 1137 #else 1138 PetscCall(PetscFree(aij->colmap)); 1139 #endif 1140 PetscCall(PetscFree(aij->garray)); 1141 PetscCall(VecDestroy(&aij->lvec)); 1142 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1143 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 1144 PetscCall(PetscFree(aij->ld)); 1145 1146 /* Free COO */ 1147 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1148 1149 PetscCall(PetscFree(mat->data)); 1150 1151 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1152 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 1153 1154 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1157 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 1160 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 1164 #if defined(PETSC_HAVE_CUDA) 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 1166 #endif 1167 #if defined(PETSC_HAVE_HIP) 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 1169 #endif 1170 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1171 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 1172 #endif 1173 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 1174 #if defined(PETSC_HAVE_ELEMENTAL) 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 1176 #endif 1177 #if defined(PETSC_HAVE_SCALAPACK) 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 1179 #endif 1180 #if defined(PETSC_HAVE_HYPRE) 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 1182 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 1183 #endif 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 1187 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 1190 #if defined(PETSC_HAVE_MKL_SPARSE) 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 1192 #endif 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1202 { 1203 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1204 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1205 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1206 const PetscInt *garray = aij->garray; 1207 const PetscScalar *aa, *ba; 1208 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1209 PetscInt *rowlens; 1210 PetscInt *colidxs; 1211 PetscScalar *matvals; 1212 1213 PetscFunctionBegin; 1214 PetscCall(PetscViewerSetUp(viewer)); 1215 1216 M = mat->rmap->N; 1217 N = mat->cmap->N; 1218 m = mat->rmap->n; 1219 rs = mat->rmap->rstart; 1220 cs = mat->cmap->rstart; 1221 nz = A->nz + B->nz; 1222 1223 /* write matrix header */ 1224 header[0] = MAT_FILE_CLASSID; 1225 header[1] = M; 1226 header[2] = N; 1227 header[3] = nz; 1228 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1229 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1230 1231 /* fill in and store row lengths */ 1232 PetscCall(PetscMalloc1(m, &rowlens)); 1233 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1235 PetscCall(PetscFree(rowlens)); 1236 1237 /* fill in and store column indices */ 1238 PetscCall(PetscMalloc1(nz, &colidxs)); 1239 for (cnt = 0, i = 0; i < m; i++) { 1240 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1241 if (garray[B->j[jb]] > cs) break; 1242 colidxs[cnt++] = garray[B->j[jb]]; 1243 } 1244 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1245 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1246 } 1247 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1248 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1249 PetscCall(PetscFree(colidxs)); 1250 1251 /* fill in and store nonzero values */ 1252 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1253 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1254 PetscCall(PetscMalloc1(nz, &matvals)); 1255 for (cnt = 0, i = 0; i < m; i++) { 1256 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1257 if (garray[B->j[jb]] > cs) break; 1258 matvals[cnt++] = ba[jb]; 1259 } 1260 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1261 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1262 } 1263 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1264 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1265 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1266 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1267 PetscCall(PetscFree(matvals)); 1268 1269 /* write block size option to the viewer's .info file */ 1270 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1271 PetscFunctionReturn(0); 1272 } 1273 1274 #include <petscdraw.h> 1275 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1276 { 1277 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1278 PetscMPIInt rank = aij->rank, size = aij->size; 1279 PetscBool isdraw, iascii, isbinary; 1280 PetscViewer sviewer; 1281 PetscViewerFormat format; 1282 1283 PetscFunctionBegin; 1284 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1285 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1286 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1287 if (iascii) { 1288 PetscCall(PetscViewerGetFormat(viewer, &format)); 1289 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1290 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1291 PetscCall(PetscMalloc1(size, &nz)); 1292 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1293 for (i = 0; i < (PetscInt)size; i++) { 1294 nmax = PetscMax(nmax, nz[i]); 1295 nmin = PetscMin(nmin, nz[i]); 1296 navg += nz[i]; 1297 } 1298 PetscCall(PetscFree(nz)); 1299 navg = navg / size; 1300 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1301 PetscFunctionReturn(0); 1302 } 1303 PetscCall(PetscViewerGetFormat(viewer, &format)); 1304 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1305 MatInfo info; 1306 PetscInt *inodes = NULL; 1307 1308 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1309 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1310 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1311 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1312 if (!inodes) { 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1314 (double)info.memory)); 1315 } else { 1316 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1317 (double)info.memory)); 1318 } 1319 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1321 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1322 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1323 PetscCall(PetscViewerFlush(viewer)); 1324 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1325 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1326 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1327 PetscFunctionReturn(0); 1328 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1329 PetscInt inodecount, inodelimit, *inodes; 1330 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1331 if (inodes) { 1332 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1333 } else { 1334 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1335 } 1336 PetscFunctionReturn(0); 1337 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1338 PetscFunctionReturn(0); 1339 } 1340 } else if (isbinary) { 1341 if (size == 1) { 1342 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1343 PetscCall(MatView(aij->A, viewer)); 1344 } else { 1345 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1346 } 1347 PetscFunctionReturn(0); 1348 } else if (iascii && size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A, viewer)); 1351 PetscFunctionReturn(0); 1352 } else if (isdraw) { 1353 PetscDraw draw; 1354 PetscBool isnull; 1355 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1356 PetscCall(PetscDrawIsNull(draw, &isnull)); 1357 if (isnull) PetscFunctionReturn(0); 1358 } 1359 1360 { /* assemble the entire matrix onto first processor */ 1361 Mat A = NULL, Av; 1362 IS isrow, iscol; 1363 1364 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1365 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1366 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1367 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1368 /* The commented code uses MatCreateSubMatrices instead */ 1369 /* 1370 Mat *AA, A = NULL, Av; 1371 IS isrow,iscol; 1372 1373 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1374 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1375 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1376 if (rank == 0) { 1377 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1378 A = AA[0]; 1379 Av = AA[0]; 1380 } 1381 PetscCall(MatDestroySubMatrices(1,&AA)); 1382 */ 1383 PetscCall(ISDestroy(&iscol)); 1384 PetscCall(ISDestroy(&isrow)); 1385 /* 1386 Everyone has to call to draw the matrix since the graphics waits are 1387 synchronized across all processors that share the PetscDraw object 1388 */ 1389 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1390 if (rank == 0) { 1391 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1392 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1393 } 1394 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1395 PetscCall(PetscViewerFlush(viewer)); 1396 PetscCall(MatDestroy(&A)); 1397 } 1398 PetscFunctionReturn(0); 1399 } 1400 1401 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1402 { 1403 PetscBool iascii, isdraw, issocket, isbinary; 1404 1405 PetscFunctionBegin; 1406 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1407 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1408 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1409 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1410 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1411 PetscFunctionReturn(0); 1412 } 1413 1414 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1415 { 1416 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1417 Vec bb1 = NULL; 1418 PetscBool hasop; 1419 1420 PetscFunctionBegin; 1421 if (flag == SOR_APPLY_UPPER) { 1422 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1423 PetscFunctionReturn(0); 1424 } 1425 1426 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1427 1428 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1429 if (flag & SOR_ZERO_INITIAL_GUESS) { 1430 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1431 its--; 1432 } 1433 1434 while (its--) { 1435 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1436 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 1438 /* update rhs: bb1 = bb - B*x */ 1439 PetscCall(VecScale(mat->lvec, -1.0)); 1440 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1441 1442 /* local sweep */ 1443 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1444 } 1445 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1446 if (flag & SOR_ZERO_INITIAL_GUESS) { 1447 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1448 its--; 1449 } 1450 while (its--) { 1451 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1452 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 1454 /* update rhs: bb1 = bb - B*x */ 1455 PetscCall(VecScale(mat->lvec, -1.0)); 1456 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1457 1458 /* local sweep */ 1459 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1460 } 1461 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1462 if (flag & SOR_ZERO_INITIAL_GUESS) { 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1464 its--; 1465 } 1466 while (its--) { 1467 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1468 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1469 1470 /* update rhs: bb1 = bb - B*x */ 1471 PetscCall(VecScale(mat->lvec, -1.0)); 1472 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1473 1474 /* local sweep */ 1475 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1476 } 1477 } else if (flag & SOR_EISENSTAT) { 1478 Vec xx1; 1479 1480 PetscCall(VecDuplicate(bb, &xx1)); 1481 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1482 1483 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1484 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1485 if (!mat->diag) { 1486 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1487 PetscCall(MatGetDiagonal(matin, mat->diag)); 1488 } 1489 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1490 if (hasop) { 1491 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1492 } else { 1493 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1494 } 1495 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1496 1497 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1498 1499 /* local sweep */ 1500 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1501 PetscCall(VecAXPY(xx, 1.0, xx1)); 1502 PetscCall(VecDestroy(&xx1)); 1503 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1504 1505 PetscCall(VecDestroy(&bb1)); 1506 1507 matin->factorerrortype = mat->A->factorerrortype; 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1512 { 1513 Mat aA, aB, Aperm; 1514 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1515 PetscScalar *aa, *ba; 1516 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1517 PetscSF rowsf, sf; 1518 IS parcolp = NULL; 1519 PetscBool done; 1520 1521 PetscFunctionBegin; 1522 PetscCall(MatGetLocalSize(A, &m, &n)); 1523 PetscCall(ISGetIndices(rowp, &rwant)); 1524 PetscCall(ISGetIndices(colp, &cwant)); 1525 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1526 1527 /* Invert row permutation to find out where my rows should go */ 1528 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1529 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1530 PetscCall(PetscSFSetFromOptions(rowsf)); 1531 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1532 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1533 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1534 1535 /* Invert column permutation to find out where my columns should go */ 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1537 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1538 PetscCall(PetscSFSetFromOptions(sf)); 1539 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1540 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1541 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(ISRestoreIndices(rowp, &rwant)); 1545 PetscCall(ISRestoreIndices(colp, &cwant)); 1546 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1547 1548 /* Find out where my gcols should go */ 1549 PetscCall(MatGetSize(aB, NULL, &ng)); 1550 PetscCall(PetscMalloc1(ng, &gcdest)); 1551 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1552 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1553 PetscCall(PetscSFSetFromOptions(sf)); 1554 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1555 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1556 PetscCall(PetscSFDestroy(&sf)); 1557 1558 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1559 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1560 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1561 for (i = 0; i < m; i++) { 1562 PetscInt row = rdest[i]; 1563 PetscMPIInt rowner; 1564 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1565 for (j = ai[i]; j < ai[i + 1]; j++) { 1566 PetscInt col = cdest[aj[j]]; 1567 PetscMPIInt cowner; 1568 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1569 if (rowner == cowner) dnnz[i]++; 1570 else onnz[i]++; 1571 } 1572 for (j = bi[i]; j < bi[i + 1]; j++) { 1573 PetscInt col = gcdest[bj[j]]; 1574 PetscMPIInt cowner; 1575 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1576 if (rowner == cowner) dnnz[i]++; 1577 else onnz[i]++; 1578 } 1579 } 1580 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1581 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1582 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1583 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1584 PetscCall(PetscSFDestroy(&rowsf)); 1585 1586 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1587 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1588 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1589 for (i = 0; i < m; i++) { 1590 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1591 PetscInt j0, rowlen; 1592 rowlen = ai[i + 1] - ai[i]; 1593 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1594 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1595 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1596 } 1597 rowlen = bi[i + 1] - bi[i]; 1598 for (j0 = j = 0; j < rowlen; j0 = j) { 1599 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1600 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1601 } 1602 } 1603 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1604 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1605 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1606 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1607 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1608 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1609 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1610 PetscCall(PetscFree3(work, rdest, cdest)); 1611 PetscCall(PetscFree(gcdest)); 1612 if (parcolp) PetscCall(ISDestroy(&colp)); 1613 *B = Aperm; 1614 PetscFunctionReturn(0); 1615 } 1616 1617 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1618 { 1619 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1620 1621 PetscFunctionBegin; 1622 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1623 if (ghosts) *ghosts = aij->garray; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1628 { 1629 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1630 Mat A = mat->A, B = mat->B; 1631 PetscLogDouble isend[5], irecv[5]; 1632 1633 PetscFunctionBegin; 1634 info->block_size = 1.0; 1635 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1636 1637 isend[0] = info->nz_used; 1638 isend[1] = info->nz_allocated; 1639 isend[2] = info->nz_unneeded; 1640 isend[3] = info->memory; 1641 isend[4] = info->mallocs; 1642 1643 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1644 1645 isend[0] += info->nz_used; 1646 isend[1] += info->nz_allocated; 1647 isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; 1649 isend[4] += info->mallocs; 1650 if (flag == MAT_LOCAL) { 1651 info->nz_used = isend[0]; 1652 info->nz_allocated = isend[1]; 1653 info->nz_unneeded = isend[2]; 1654 info->memory = isend[3]; 1655 info->mallocs = isend[4]; 1656 } else if (flag == MAT_GLOBAL_MAX) { 1657 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1658 1659 info->nz_used = irecv[0]; 1660 info->nz_allocated = irecv[1]; 1661 info->nz_unneeded = irecv[2]; 1662 info->memory = irecv[3]; 1663 info->mallocs = irecv[4]; 1664 } else if (flag == MAT_GLOBAL_SUM) { 1665 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } 1673 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1674 info->fill_ratio_needed = 0; 1675 info->factor_mallocs = 0; 1676 PetscFunctionReturn(0); 1677 } 1678 1679 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1680 { 1681 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1682 1683 PetscFunctionBegin; 1684 switch (op) { 1685 case MAT_NEW_NONZERO_LOCATIONS: 1686 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1687 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1688 case MAT_KEEP_NONZERO_PATTERN: 1689 case MAT_NEW_NONZERO_LOCATION_ERR: 1690 case MAT_USE_INODES: 1691 case MAT_IGNORE_ZERO_ENTRIES: 1692 case MAT_FORM_EXPLICIT_TRANSPOSE: 1693 MatCheckPreallocated(A, 1); 1694 PetscCall(MatSetOption(a->A, op, flg)); 1695 PetscCall(MatSetOption(a->B, op, flg)); 1696 break; 1697 case MAT_ROW_ORIENTED: 1698 MatCheckPreallocated(A, 1); 1699 a->roworiented = flg; 1700 1701 PetscCall(MatSetOption(a->A, op, flg)); 1702 PetscCall(MatSetOption(a->B, op, flg)); 1703 break; 1704 case MAT_FORCE_DIAGONAL_ENTRIES: 1705 case MAT_SORTED_FULL: 1706 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1707 break; 1708 case MAT_IGNORE_OFF_PROC_ENTRIES: 1709 a->donotstash = flg; 1710 break; 1711 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1712 case MAT_SPD: 1713 case MAT_SYMMETRIC: 1714 case MAT_STRUCTURALLY_SYMMETRIC: 1715 case MAT_HERMITIAN: 1716 case MAT_SYMMETRY_ETERNAL: 1717 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1718 case MAT_SPD_ETERNAL: 1719 /* if the diagonal matrix is square it inherits some of the properties above */ 1720 break; 1721 case MAT_SUBMAT_SINGLEIS: 1722 A->submat_singleis = flg; 1723 break; 1724 case MAT_STRUCTURE_ONLY: 1725 /* The option is handled directly by MatSetOption() */ 1726 break; 1727 default: 1728 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1729 } 1730 PetscFunctionReturn(0); 1731 } 1732 1733 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1734 { 1735 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1736 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1737 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1738 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1739 PetscInt *cmap, *idx_p; 1740 1741 PetscFunctionBegin; 1742 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1743 mat->getrowactive = PETSC_TRUE; 1744 1745 if (!mat->rowvalues && (idx || v)) { 1746 /* 1747 allocate enough space to hold information from the longest row. 1748 */ 1749 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1750 PetscInt max = 1, tmp; 1751 for (i = 0; i < matin->rmap->n; i++) { 1752 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1753 if (max < tmp) max = tmp; 1754 } 1755 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1756 } 1757 1758 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1759 lrow = row - rstart; 1760 1761 pvA = &vworkA; 1762 pcA = &cworkA; 1763 pvB = &vworkB; 1764 pcB = &cworkB; 1765 if (!v) { 1766 pvA = NULL; 1767 pvB = NULL; 1768 } 1769 if (!idx) { 1770 pcA = NULL; 1771 if (!v) pcB = NULL; 1772 } 1773 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1774 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1775 nztot = nzA + nzB; 1776 1777 cmap = mat->garray; 1778 if (v || idx) { 1779 if (nztot) { 1780 /* Sort by increasing column numbers, assuming A and B already sorted */ 1781 PetscInt imark = -1; 1782 if (v) { 1783 *v = v_p = mat->rowvalues; 1784 for (i = 0; i < nzB; i++) { 1785 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1786 else break; 1787 } 1788 imark = i; 1789 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1790 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1791 } 1792 if (idx) { 1793 *idx = idx_p = mat->rowindices; 1794 if (imark > -1) { 1795 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1796 } else { 1797 for (i = 0; i < nzB; i++) { 1798 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1799 else break; 1800 } 1801 imark = i; 1802 } 1803 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1804 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1805 } 1806 } else { 1807 if (idx) *idx = NULL; 1808 if (v) *v = NULL; 1809 } 1810 } 1811 *nz = nztot; 1812 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1813 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1818 { 1819 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1820 1821 PetscFunctionBegin; 1822 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1823 aij->getrowactive = PETSC_FALSE; 1824 PetscFunctionReturn(0); 1825 } 1826 1827 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1828 { 1829 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1830 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1831 PetscInt i, j, cstart = mat->cmap->rstart; 1832 PetscReal sum = 0.0; 1833 const MatScalar *v, *amata, *bmata; 1834 1835 PetscFunctionBegin; 1836 if (aij->size == 1) { 1837 PetscCall(MatNorm(aij->A, type, norm)); 1838 } else { 1839 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1840 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1841 if (type == NORM_FROBENIUS) { 1842 v = amata; 1843 for (i = 0; i < amat->nz; i++) { 1844 sum += PetscRealPart(PetscConj(*v) * (*v)); 1845 v++; 1846 } 1847 v = bmata; 1848 for (i = 0; i < bmat->nz; i++) { 1849 sum += PetscRealPart(PetscConj(*v) * (*v)); 1850 v++; 1851 } 1852 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 *norm = PetscSqrtReal(*norm); 1854 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1855 } else if (type == NORM_1) { /* max column norm */ 1856 PetscReal *tmp, *tmp2; 1857 PetscInt *jj, *garray = aij->garray; 1858 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1859 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1860 *norm = 0.0; 1861 v = amata; 1862 jj = amat->j; 1863 for (j = 0; j < amat->nz; j++) { 1864 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = bmata; 1868 jj = bmat->j; 1869 for (j = 0; j < bmat->nz; j++) { 1870 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1871 v++; 1872 } 1873 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1874 for (j = 0; j < mat->cmap->N; j++) { 1875 if (tmp2[j] > *norm) *norm = tmp2[j]; 1876 } 1877 PetscCall(PetscFree(tmp)); 1878 PetscCall(PetscFree(tmp2)); 1879 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1880 } else if (type == NORM_INFINITY) { /* max row norm */ 1881 PetscReal ntemp = 0.0; 1882 for (j = 0; j < aij->A->rmap->n; j++) { 1883 v = amata + amat->i[j]; 1884 sum = 0.0; 1885 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1886 sum += PetscAbsScalar(*v); 1887 v++; 1888 } 1889 v = bmata + bmat->i[j]; 1890 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); 1892 v++; 1893 } 1894 if (sum > ntemp) ntemp = sum; 1895 } 1896 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1897 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1898 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1899 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1900 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1901 } 1902 PetscFunctionReturn(0); 1903 } 1904 1905 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1906 { 1907 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1908 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1909 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1910 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1911 Mat B, A_diag, *B_diag; 1912 const MatScalar *pbv, *bv; 1913 1914 PetscFunctionBegin; 1915 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1916 ma = A->rmap->n; 1917 na = A->cmap->n; 1918 mb = a->B->rmap->n; 1919 nb = a->B->cmap->n; 1920 ai = Aloc->i; 1921 aj = Aloc->j; 1922 bi = Bloc->i; 1923 bj = Bloc->j; 1924 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1925 PetscInt *d_nnz, *g_nnz, *o_nnz; 1926 PetscSFNode *oloc; 1927 PETSC_UNUSED PetscSF sf; 1928 1929 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1930 /* compute d_nnz for preallocation */ 1931 PetscCall(PetscArrayzero(d_nnz, na)); 1932 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1933 /* compute local off-diagonal contributions */ 1934 PetscCall(PetscArrayzero(g_nnz, nb)); 1935 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1936 /* map those to global */ 1937 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1938 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1939 PetscCall(PetscSFSetFromOptions(sf)); 1940 PetscCall(PetscArrayzero(o_nnz, na)); 1941 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1942 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1943 PetscCall(PetscSFDestroy(&sf)); 1944 1945 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1946 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1947 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1948 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1949 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1950 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1951 } else { 1952 B = *matout; 1953 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1954 } 1955 1956 b = (Mat_MPIAIJ *)B->data; 1957 A_diag = a->A; 1958 B_diag = &b->A; 1959 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1960 A_diag_ncol = A_diag->cmap->N; 1961 B_diag_ilen = sub_B_diag->ilen; 1962 B_diag_i = sub_B_diag->i; 1963 1964 /* Set ilen for diagonal of B */ 1965 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1966 1967 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1968 very quickly (=without using MatSetValues), because all writes are local. */ 1969 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1970 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1971 1972 /* copy over the B part */ 1973 PetscCall(PetscMalloc1(bi[mb], &cols)); 1974 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1975 pbv = bv; 1976 row = A->rmap->rstart; 1977 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1978 cols_tmp = cols; 1979 for (i = 0; i < mb; i++) { 1980 ncol = bi[i + 1] - bi[i]; 1981 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1982 row++; 1983 pbv += ncol; 1984 cols_tmp += ncol; 1985 } 1986 PetscCall(PetscFree(cols)); 1987 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1988 1989 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1990 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1991 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1992 *matout = B; 1993 } else { 1994 PetscCall(MatHeaderMerge(A, &B)); 1995 } 1996 PetscFunctionReturn(0); 1997 } 1998 1999 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2000 { 2001 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2002 Mat a = aij->A, b = aij->B; 2003 PetscInt s1, s2, s3; 2004 2005 PetscFunctionBegin; 2006 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2007 if (rr) { 2008 PetscCall(VecGetLocalSize(rr, &s1)); 2009 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2010 /* Overlap communication with computation. */ 2011 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2012 } 2013 if (ll) { 2014 PetscCall(VecGetLocalSize(ll, &s1)); 2015 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2016 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2017 } 2018 /* scale the diagonal block */ 2019 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2020 2021 if (rr) { 2022 /* Do a scatter end and then right scale the off-diagonal block */ 2023 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2024 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2025 } 2026 PetscFunctionReturn(0); 2027 } 2028 2029 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2030 { 2031 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2032 2033 PetscFunctionBegin; 2034 PetscCall(MatSetUnfactored(a->A)); 2035 PetscFunctionReturn(0); 2036 } 2037 2038 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2039 { 2040 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2041 Mat a, b, c, d; 2042 PetscBool flg; 2043 2044 PetscFunctionBegin; 2045 a = matA->A; 2046 b = matA->B; 2047 c = matB->A; 2048 d = matB->B; 2049 2050 PetscCall(MatEqual(a, c, &flg)); 2051 if (flg) PetscCall(MatEqual(b, d, &flg)); 2052 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2057 { 2058 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2059 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2060 2061 PetscFunctionBegin; 2062 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2063 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2064 /* because of the column compression in the off-processor part of the matrix a->B, 2065 the number of columns in a->B and b->B may be different, hence we cannot call 2066 the MatCopy() directly on the two parts. If need be, we can provide a more 2067 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2068 then copying the submatrices */ 2069 PetscCall(MatCopy_Basic(A, B, str)); 2070 } else { 2071 PetscCall(MatCopy(a->A, b->A, str)); 2072 PetscCall(MatCopy(a->B, b->B, str)); 2073 } 2074 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2075 PetscFunctionReturn(0); 2076 } 2077 2078 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2079 { 2080 PetscFunctionBegin; 2081 PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 2082 PetscFunctionReturn(0); 2083 } 2084 2085 /* 2086 Computes the number of nonzeros per row needed for preallocation when X and Y 2087 have different nonzero structure. 2088 */ 2089 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2090 { 2091 PetscInt i, j, k, nzx, nzy; 2092 2093 PetscFunctionBegin; 2094 /* Set the number of nonzeros in the new matrix */ 2095 for (i = 0; i < m; i++) { 2096 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2097 nzx = xi[i + 1] - xi[i]; 2098 nzy = yi[i + 1] - yi[i]; 2099 nnz[i] = 0; 2100 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2101 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2102 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2103 nnz[i]++; 2104 } 2105 for (; k < nzy; k++) nnz[i]++; 2106 } 2107 PetscFunctionReturn(0); 2108 } 2109 2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2112 { 2113 PetscInt m = Y->rmap->N; 2114 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2115 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2116 2117 PetscFunctionBegin; 2118 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2123 { 2124 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2125 2126 PetscFunctionBegin; 2127 if (str == SAME_NONZERO_PATTERN) { 2128 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2129 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2130 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2131 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2132 } else { 2133 Mat B; 2134 PetscInt *nnz_d, *nnz_o; 2135 2136 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2137 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2138 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2139 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2140 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2141 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2142 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2143 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2144 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2145 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2146 PetscCall(MatHeaderMerge(Y, &B)); 2147 PetscCall(PetscFree(nnz_d)); 2148 PetscCall(PetscFree(nnz_o)); 2149 } 2150 PetscFunctionReturn(0); 2151 } 2152 2153 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2154 2155 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2156 { 2157 PetscFunctionBegin; 2158 if (PetscDefined(USE_COMPLEX)) { 2159 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2160 2161 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2162 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2163 } 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2168 { 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2170 2171 PetscFunctionBegin; 2172 PetscCall(MatRealPart(a->A)); 2173 PetscCall(MatRealPart(a->B)); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2180 2181 PetscFunctionBegin; 2182 PetscCall(MatImaginaryPart(a->A)); 2183 PetscCall(MatImaginaryPart(a->B)); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2188 { 2189 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2190 PetscInt i, *idxb = NULL, m = A->rmap->n; 2191 PetscScalar *va, *vv; 2192 Vec vB, vA; 2193 const PetscScalar *vb; 2194 2195 PetscFunctionBegin; 2196 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2197 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2198 2199 PetscCall(VecGetArrayWrite(vA, &va)); 2200 if (idx) { 2201 for (i = 0; i < m; i++) { 2202 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2203 } 2204 } 2205 2206 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2207 PetscCall(PetscMalloc1(m, &idxb)); 2208 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2209 2210 PetscCall(VecGetArrayWrite(v, &vv)); 2211 PetscCall(VecGetArrayRead(vB, &vb)); 2212 for (i = 0; i < m; i++) { 2213 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2214 vv[i] = vb[i]; 2215 if (idx) idx[i] = a->garray[idxb[i]]; 2216 } else { 2217 vv[i] = va[i]; 2218 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2219 } 2220 } 2221 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2222 PetscCall(VecRestoreArrayWrite(vA, &va)); 2223 PetscCall(VecRestoreArrayRead(vB, &vb)); 2224 PetscCall(PetscFree(idxb)); 2225 PetscCall(VecDestroy(&vA)); 2226 PetscCall(VecDestroy(&vB)); 2227 PetscFunctionReturn(0); 2228 } 2229 2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2231 { 2232 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2233 PetscInt m = A->rmap->n, n = A->cmap->n; 2234 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2235 PetscInt *cmap = mat->garray; 2236 PetscInt *diagIdx, *offdiagIdx; 2237 Vec diagV, offdiagV; 2238 PetscScalar *a, *diagA, *offdiagA; 2239 const PetscScalar *ba, *bav; 2240 PetscInt r, j, col, ncols, *bi, *bj; 2241 Mat B = mat->B; 2242 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2243 2244 PetscFunctionBegin; 2245 /* When a process holds entire A and other processes have no entry */ 2246 if (A->cmap->N == n) { 2247 PetscCall(VecGetArrayWrite(v, &diagA)); 2248 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2250 PetscCall(VecDestroy(&diagV)); 2251 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2252 PetscFunctionReturn(0); 2253 } else if (n == 0) { 2254 if (m) { 2255 PetscCall(VecGetArrayWrite(v, &a)); 2256 for (r = 0; r < m; r++) { 2257 a[r] = 0.0; 2258 if (idx) idx[r] = -1; 2259 } 2260 PetscCall(VecRestoreArrayWrite(v, &a)); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2266 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2268 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2269 2270 /* Get offdiagIdx[] for implicit 0.0 */ 2271 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2272 ba = bav; 2273 bi = b->i; 2274 bj = b->j; 2275 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2276 for (r = 0; r < m; r++) { 2277 ncols = bi[r + 1] - bi[r]; 2278 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2279 offdiagA[r] = *ba; 2280 offdiagIdx[r] = cmap[0]; 2281 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2282 offdiagA[r] = 0.0; 2283 2284 /* Find first hole in the cmap */ 2285 for (j = 0; j < ncols; j++) { 2286 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2287 if (col > j && j < cstart) { 2288 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2289 break; 2290 } else if (col > j + n && j >= cstart) { 2291 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2292 break; 2293 } 2294 } 2295 if (j == ncols && ncols < A->cmap->N - n) { 2296 /* a hole is outside compressed Bcols */ 2297 if (ncols == 0) { 2298 if (cstart) { 2299 offdiagIdx[r] = 0; 2300 } else offdiagIdx[r] = cend; 2301 } else { /* ncols > 0 */ 2302 offdiagIdx[r] = cmap[ncols - 1] + 1; 2303 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2304 } 2305 } 2306 } 2307 2308 for (j = 0; j < ncols; j++) { 2309 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2310 offdiagA[r] = *ba; 2311 offdiagIdx[r] = cmap[*bj]; 2312 } 2313 ba++; 2314 bj++; 2315 } 2316 } 2317 2318 PetscCall(VecGetArrayWrite(v, &a)); 2319 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 for (r = 0; r < m; ++r) { 2321 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) idx[r] = cstart + diagIdx[r]; 2324 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2325 a[r] = diagA[r]; 2326 if (idx) { 2327 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2328 idx[r] = cstart + diagIdx[r]; 2329 } else idx[r] = offdiagIdx[r]; 2330 } 2331 } else { 2332 a[r] = offdiagA[r]; 2333 if (idx) idx[r] = offdiagIdx[r]; 2334 } 2335 } 2336 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2337 PetscCall(VecRestoreArrayWrite(v, &a)); 2338 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2339 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecDestroy(&offdiagV)); 2342 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2343 PetscFunctionReturn(0); 2344 } 2345 2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347 { 2348 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2349 PetscInt m = A->rmap->n, n = A->cmap->n; 2350 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2351 PetscInt *cmap = mat->garray; 2352 PetscInt *diagIdx, *offdiagIdx; 2353 Vec diagV, offdiagV; 2354 PetscScalar *a, *diagA, *offdiagA; 2355 const PetscScalar *ba, *bav; 2356 PetscInt r, j, col, ncols, *bi, *bj; 2357 Mat B = mat->B; 2358 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2359 2360 PetscFunctionBegin; 2361 /* When a process holds entire A and other processes have no entry */ 2362 if (A->cmap->N == n) { 2363 PetscCall(VecGetArrayWrite(v, &diagA)); 2364 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2366 PetscCall(VecDestroy(&diagV)); 2367 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2368 PetscFunctionReturn(0); 2369 } else if (n == 0) { 2370 if (m) { 2371 PetscCall(VecGetArrayWrite(v, &a)); 2372 for (r = 0; r < m; r++) { 2373 a[r] = PETSC_MAX_REAL; 2374 if (idx) idx[r] = -1; 2375 } 2376 PetscCall(VecRestoreArrayWrite(v, &a)); 2377 } 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2382 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2384 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2385 2386 /* Get offdiagIdx[] for implicit 0.0 */ 2387 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2388 ba = bav; 2389 bi = b->i; 2390 bj = b->j; 2391 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2392 for (r = 0; r < m; r++) { 2393 ncols = bi[r + 1] - bi[r]; 2394 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2395 offdiagA[r] = *ba; 2396 offdiagIdx[r] = cmap[0]; 2397 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2398 offdiagA[r] = 0.0; 2399 2400 /* Find first hole in the cmap */ 2401 for (j = 0; j < ncols; j++) { 2402 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2403 if (col > j && j < cstart) { 2404 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2405 break; 2406 } else if (col > j + n && j >= cstart) { 2407 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2408 break; 2409 } 2410 } 2411 if (j == ncols && ncols < A->cmap->N - n) { 2412 /* a hole is outside compressed Bcols */ 2413 if (ncols == 0) { 2414 if (cstart) { 2415 offdiagIdx[r] = 0; 2416 } else offdiagIdx[r] = cend; 2417 } else { /* ncols > 0 */ 2418 offdiagIdx[r] = cmap[ncols - 1] + 1; 2419 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2420 } 2421 } 2422 } 2423 2424 for (j = 0; j < ncols; j++) { 2425 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2426 offdiagA[r] = *ba; 2427 offdiagIdx[r] = cmap[*bj]; 2428 } 2429 ba++; 2430 bj++; 2431 } 2432 } 2433 2434 PetscCall(VecGetArrayWrite(v, &a)); 2435 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 for (r = 0; r < m; ++r) { 2437 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) idx[r] = cstart + diagIdx[r]; 2440 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 if (idx) { 2443 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2444 idx[r] = cstart + diagIdx[r]; 2445 } else idx[r] = offdiagIdx[r]; 2446 } 2447 } else { 2448 a[r] = offdiagA[r]; 2449 if (idx) idx[r] = offdiagIdx[r]; 2450 } 2451 } 2452 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2453 PetscCall(VecRestoreArrayWrite(v, &a)); 2454 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2455 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecDestroy(&offdiagV)); 2458 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2459 PetscFunctionReturn(0); 2460 } 2461 2462 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2463 { 2464 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2465 PetscInt m = A->rmap->n, n = A->cmap->n; 2466 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2467 PetscInt *cmap = mat->garray; 2468 PetscInt *diagIdx, *offdiagIdx; 2469 Vec diagV, offdiagV; 2470 PetscScalar *a, *diagA, *offdiagA; 2471 const PetscScalar *ba, *bav; 2472 PetscInt r, j, col, ncols, *bi, *bj; 2473 Mat B = mat->B; 2474 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2475 2476 PetscFunctionBegin; 2477 /* When a process holds entire A and other processes have no entry */ 2478 if (A->cmap->N == n) { 2479 PetscCall(VecGetArrayWrite(v, &diagA)); 2480 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2482 PetscCall(VecDestroy(&diagV)); 2483 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2484 PetscFunctionReturn(0); 2485 } else if (n == 0) { 2486 if (m) { 2487 PetscCall(VecGetArrayWrite(v, &a)); 2488 for (r = 0; r < m; r++) { 2489 a[r] = PETSC_MIN_REAL; 2490 if (idx) idx[r] = -1; 2491 } 2492 PetscCall(VecRestoreArrayWrite(v, &a)); 2493 } 2494 PetscFunctionReturn(0); 2495 } 2496 2497 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2498 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2500 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2501 2502 /* Get offdiagIdx[] for implicit 0.0 */ 2503 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2504 ba = bav; 2505 bi = b->i; 2506 bj = b->j; 2507 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2508 for (r = 0; r < m; r++) { 2509 ncols = bi[r + 1] - bi[r]; 2510 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2511 offdiagA[r] = *ba; 2512 offdiagIdx[r] = cmap[0]; 2513 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2514 offdiagA[r] = 0.0; 2515 2516 /* Find first hole in the cmap */ 2517 for (j = 0; j < ncols; j++) { 2518 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2519 if (col > j && j < cstart) { 2520 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2521 break; 2522 } else if (col > j + n && j >= cstart) { 2523 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2524 break; 2525 } 2526 } 2527 if (j == ncols && ncols < A->cmap->N - n) { 2528 /* a hole is outside compressed Bcols */ 2529 if (ncols == 0) { 2530 if (cstart) { 2531 offdiagIdx[r] = 0; 2532 } else offdiagIdx[r] = cend; 2533 } else { /* ncols > 0 */ 2534 offdiagIdx[r] = cmap[ncols - 1] + 1; 2535 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2536 } 2537 } 2538 } 2539 2540 for (j = 0; j < ncols; j++) { 2541 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2542 offdiagA[r] = *ba; 2543 offdiagIdx[r] = cmap[*bj]; 2544 } 2545 ba++; 2546 bj++; 2547 } 2548 } 2549 2550 PetscCall(VecGetArrayWrite(v, &a)); 2551 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 for (r = 0; r < m; ++r) { 2553 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) idx[r] = cstart + diagIdx[r]; 2556 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2557 a[r] = diagA[r]; 2558 if (idx) { 2559 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2560 idx[r] = cstart + diagIdx[r]; 2561 } else idx[r] = offdiagIdx[r]; 2562 } 2563 } else { 2564 a[r] = offdiagA[r]; 2565 if (idx) idx[r] = offdiagIdx[r]; 2566 } 2567 } 2568 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2569 PetscCall(VecRestoreArrayWrite(v, &a)); 2570 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2571 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2572 PetscCall(VecDestroy(&diagV)); 2573 PetscCall(VecDestroy(&offdiagV)); 2574 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2575 PetscFunctionReturn(0); 2576 } 2577 2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2579 { 2580 Mat *dummy; 2581 2582 PetscFunctionBegin; 2583 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2584 *newmat = *dummy; 2585 PetscCall(PetscFree(dummy)); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2590 { 2591 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2592 2593 PetscFunctionBegin; 2594 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2595 A->factorerrortype = a->A->factorerrortype; 2596 PetscFunctionReturn(0); 2597 } 2598 2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2600 { 2601 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2602 2603 PetscFunctionBegin; 2604 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2605 PetscCall(MatSetRandom(aij->A, rctx)); 2606 if (x->assembled) { 2607 PetscCall(MatSetRandom(aij->B, rctx)); 2608 } else { 2609 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2610 } 2611 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2612 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2613 PetscFunctionReturn(0); 2614 } 2615 2616 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2617 { 2618 PetscFunctionBegin; 2619 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2620 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 /*@ 2625 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2626 2627 Not collective 2628 2629 Input Parameter: 2630 . A - the matrix 2631 2632 Output Parameter: 2633 . nz - the number of nonzeros 2634 2635 Level: advanced 2636 2637 .seealso: `MATMPIAIJ`, `Mat` 2638 @*/ 2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2640 { 2641 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2642 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2643 2644 PetscFunctionBegin; 2645 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2646 PetscFunctionReturn(0); 2647 } 2648 2649 /*@ 2650 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2651 2652 Collective 2653 2654 Input Parameters: 2655 + A - the matrix 2656 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2657 2658 Level: advanced 2659 2660 @*/ 2661 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2662 { 2663 PetscFunctionBegin; 2664 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2665 PetscFunctionReturn(0); 2666 } 2667 2668 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2669 { 2670 PetscBool sc = PETSC_FALSE, flg; 2671 2672 PetscFunctionBegin; 2673 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2674 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2675 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2676 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2677 PetscOptionsHeadEnd(); 2678 PetscFunctionReturn(0); 2679 } 2680 2681 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2682 { 2683 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2684 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2685 2686 PetscFunctionBegin; 2687 if (!Y->preallocated) { 2688 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2689 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2690 PetscInt nonew = aij->nonew; 2691 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2692 aij->nonew = nonew; 2693 } 2694 PetscCall(MatShift_Basic(Y, a)); 2695 PetscFunctionReturn(0); 2696 } 2697 2698 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2704 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2705 if (d) { 2706 PetscInt rstart; 2707 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2708 *d += rstart; 2709 } 2710 PetscFunctionReturn(0); 2711 } 2712 2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2714 { 2715 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2716 2717 PetscFunctionBegin; 2718 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2719 PetscFunctionReturn(0); 2720 } 2721 2722 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2723 { 2724 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2725 2726 PetscFunctionBegin; 2727 PetscCall(MatEliminateZeros(a->A)); 2728 PetscCall(MatEliminateZeros(a->B)); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 /* -------------------------------------------------------------------*/ 2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2734 MatGetRow_MPIAIJ, 2735 MatRestoreRow_MPIAIJ, 2736 MatMult_MPIAIJ, 2737 /* 4*/ MatMultAdd_MPIAIJ, 2738 MatMultTranspose_MPIAIJ, 2739 MatMultTransposeAdd_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*10*/ NULL, 2744 NULL, 2745 NULL, 2746 MatSOR_MPIAIJ, 2747 MatTranspose_MPIAIJ, 2748 /*15*/ MatGetInfo_MPIAIJ, 2749 MatEqual_MPIAIJ, 2750 MatGetDiagonal_MPIAIJ, 2751 MatDiagonalScale_MPIAIJ, 2752 MatNorm_MPIAIJ, 2753 /*20*/ MatAssemblyBegin_MPIAIJ, 2754 MatAssemblyEnd_MPIAIJ, 2755 MatSetOption_MPIAIJ, 2756 MatZeroEntries_MPIAIJ, 2757 /*24*/ MatZeroRows_MPIAIJ, 2758 NULL, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*29*/ MatSetUp_MPIAIJ, 2763 NULL, 2764 NULL, 2765 MatGetDiagonalBlock_MPIAIJ, 2766 NULL, 2767 /*34*/ MatDuplicate_MPIAIJ, 2768 NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 /*39*/ MatAXPY_MPIAIJ, 2773 MatCreateSubMatrices_MPIAIJ, 2774 MatIncreaseOverlap_MPIAIJ, 2775 MatGetValues_MPIAIJ, 2776 MatCopy_MPIAIJ, 2777 /*44*/ MatGetRowMax_MPIAIJ, 2778 MatScale_MPIAIJ, 2779 MatShift_MPIAIJ, 2780 MatDiagonalSet_MPIAIJ, 2781 MatZeroRowsColumns_MPIAIJ, 2782 /*49*/ MatSetRandom_MPIAIJ, 2783 MatGetRowIJ_MPIAIJ, 2784 MatRestoreRowIJ_MPIAIJ, 2785 NULL, 2786 NULL, 2787 /*54*/ MatFDColoringCreate_MPIXAIJ, 2788 NULL, 2789 MatSetUnfactored_MPIAIJ, 2790 MatPermute_MPIAIJ, 2791 NULL, 2792 /*59*/ MatCreateSubMatrix_MPIAIJ, 2793 MatDestroy_MPIAIJ, 2794 MatView_MPIAIJ, 2795 NULL, 2796 NULL, 2797 /*64*/ NULL, 2798 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2799 NULL, 2800 NULL, 2801 NULL, 2802 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2803 MatGetRowMinAbs_MPIAIJ, 2804 NULL, 2805 NULL, 2806 NULL, 2807 NULL, 2808 /*75*/ MatFDColoringApply_AIJ, 2809 MatSetFromOptions_MPIAIJ, 2810 NULL, 2811 NULL, 2812 MatFindZeroDiagonals_MPIAIJ, 2813 /*80*/ NULL, 2814 NULL, 2815 NULL, 2816 /*83*/ MatLoad_MPIAIJ, 2817 MatIsSymmetric_MPIAIJ, 2818 NULL, 2819 NULL, 2820 NULL, 2821 NULL, 2822 /*89*/ NULL, 2823 NULL, 2824 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2825 NULL, 2826 NULL, 2827 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2828 NULL, 2829 NULL, 2830 NULL, 2831 MatBindToCPU_MPIAIJ, 2832 /*99*/ MatProductSetFromOptions_MPIAIJ, 2833 NULL, 2834 NULL, 2835 MatConjugate_MPIAIJ, 2836 NULL, 2837 /*104*/ MatSetValuesRow_MPIAIJ, 2838 MatRealPart_MPIAIJ, 2839 MatImaginaryPart_MPIAIJ, 2840 NULL, 2841 NULL, 2842 /*109*/ NULL, 2843 NULL, 2844 MatGetRowMin_MPIAIJ, 2845 NULL, 2846 MatMissingDiagonal_MPIAIJ, 2847 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2848 NULL, 2849 MatGetGhosts_MPIAIJ, 2850 NULL, 2851 NULL, 2852 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2853 NULL, 2854 NULL, 2855 NULL, 2856 MatGetMultiProcBlock_MPIAIJ, 2857 /*124*/ MatFindNonzeroRows_MPIAIJ, 2858 MatGetColumnReductions_MPIAIJ, 2859 MatInvertBlockDiagonal_MPIAIJ, 2860 MatInvertVariableBlockDiagonal_MPIAIJ, 2861 MatCreateSubMatricesMPI_MPIAIJ, 2862 /*129*/ NULL, 2863 NULL, 2864 NULL, 2865 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2866 NULL, 2867 /*134*/ NULL, 2868 NULL, 2869 NULL, 2870 NULL, 2871 NULL, 2872 /*139*/ MatSetBlockSizes_MPIAIJ, 2873 NULL, 2874 NULL, 2875 MatFDColoringSetUp_MPIXAIJ, 2876 MatFindOffBlockDiagonalEntries_MPIAIJ, 2877 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2878 /*145*/ NULL, 2879 NULL, 2880 NULL, 2881 MatCreateGraph_Simple_AIJ, 2882 NULL, 2883 /*150*/ NULL, 2884 MatEliminateZeros_MPIAIJ}; 2885 2886 /* ----------------------------------------------------------------------------------------*/ 2887 2888 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2889 { 2890 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2891 2892 PetscFunctionBegin; 2893 PetscCall(MatStoreValues(aij->A)); 2894 PetscCall(MatStoreValues(aij->B)); 2895 PetscFunctionReturn(0); 2896 } 2897 2898 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2899 { 2900 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2901 2902 PetscFunctionBegin; 2903 PetscCall(MatRetrieveValues(aij->A)); 2904 PetscCall(MatRetrieveValues(aij->B)); 2905 PetscFunctionReturn(0); 2906 } 2907 2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2909 { 2910 Mat_MPIAIJ *b; 2911 PetscMPIInt size; 2912 2913 PetscFunctionBegin; 2914 PetscCall(PetscLayoutSetUp(B->rmap)); 2915 PetscCall(PetscLayoutSetUp(B->cmap)); 2916 b = (Mat_MPIAIJ *)B->data; 2917 2918 #if defined(PETSC_USE_CTABLE) 2919 PetscCall(PetscHMapIDestroy(&b->colmap)); 2920 #else 2921 PetscCall(PetscFree(b->colmap)); 2922 #endif 2923 PetscCall(PetscFree(b->garray)); 2924 PetscCall(VecDestroy(&b->lvec)); 2925 PetscCall(VecScatterDestroy(&b->Mvctx)); 2926 2927 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2928 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2929 PetscCall(MatDestroy(&b->B)); 2930 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2931 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2932 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2933 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2934 2935 if (!B->preallocated) { 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 } 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(0); 2948 } 2949 2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b; 2953 2954 PetscFunctionBegin; 2955 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2956 PetscCall(PetscLayoutSetUp(B->rmap)); 2957 PetscCall(PetscLayoutSetUp(B->cmap)); 2958 b = (Mat_MPIAIJ *)B->data; 2959 2960 #if defined(PETSC_USE_CTABLE) 2961 PetscCall(PetscHMapIDestroy(&b->colmap)); 2962 #else 2963 PetscCall(PetscFree(b->colmap)); 2964 #endif 2965 PetscCall(PetscFree(b->garray)); 2966 PetscCall(VecDestroy(&b->lvec)); 2967 PetscCall(VecScatterDestroy(&b->Mvctx)); 2968 2969 PetscCall(MatResetPreallocation(b->A)); 2970 PetscCall(MatResetPreallocation(b->B)); 2971 B->preallocated = PETSC_TRUE; 2972 B->was_assembled = PETSC_FALSE; 2973 B->assembled = PETSC_FALSE; 2974 PetscFunctionReturn(0); 2975 } 2976 2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2978 { 2979 Mat mat; 2980 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2981 2982 PetscFunctionBegin; 2983 *newmat = NULL; 2984 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2985 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2986 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2987 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2988 a = (Mat_MPIAIJ *)mat->data; 2989 2990 mat->factortype = matin->factortype; 2991 mat->assembled = matin->assembled; 2992 mat->insertmode = NOT_SET_VALUES; 2993 mat->preallocated = matin->preallocated; 2994 2995 a->size = oldmat->size; 2996 a->rank = oldmat->rank; 2997 a->donotstash = oldmat->donotstash; 2998 a->roworiented = oldmat->roworiented; 2999 a->rowindices = NULL; 3000 a->rowvalues = NULL; 3001 a->getrowactive = PETSC_FALSE; 3002 3003 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3004 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3005 3006 if (oldmat->colmap) { 3007 #if defined(PETSC_USE_CTABLE) 3008 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3009 #else 3010 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3011 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3012 #endif 3013 } else a->colmap = NULL; 3014 if (oldmat->garray) { 3015 PetscInt len; 3016 len = oldmat->B->cmap->n; 3017 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3018 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3019 } else a->garray = NULL; 3020 3021 /* It may happen MatDuplicate is called with a non-assembled matrix 3022 In fact, MatDuplicate only requires the matrix to be preallocated 3023 This may happen inside a DMCreateMatrix_Shell */ 3024 if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); } 3025 if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); } 3026 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3027 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3028 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3029 *newmat = mat; 3030 PetscFunctionReturn(0); 3031 } 3032 3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3034 { 3035 PetscBool isbinary, ishdf5; 3036 3037 PetscFunctionBegin; 3038 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3039 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3040 /* force binary viewer to load .info file if it has not yet done so */ 3041 PetscCall(PetscViewerSetUp(viewer)); 3042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3043 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3044 if (isbinary) { 3045 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3046 } else if (ishdf5) { 3047 #if defined(PETSC_HAVE_HDF5) 3048 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3049 #else 3050 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3051 #endif 3052 } else { 3053 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3054 } 3055 PetscFunctionReturn(0); 3056 } 3057 3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3059 { 3060 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3061 PetscInt *rowidxs, *colidxs; 3062 PetscScalar *matvals; 3063 3064 PetscFunctionBegin; 3065 PetscCall(PetscViewerSetUp(viewer)); 3066 3067 /* read in matrix header */ 3068 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3069 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3070 M = header[1]; 3071 N = header[2]; 3072 nz = header[3]; 3073 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3074 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3075 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3076 3077 /* set block sizes from the viewer's .info file */ 3078 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3079 /* set global sizes if not set already */ 3080 if (mat->rmap->N < 0) mat->rmap->N = M; 3081 if (mat->cmap->N < 0) mat->cmap->N = N; 3082 PetscCall(PetscLayoutSetUp(mat->rmap)); 3083 PetscCall(PetscLayoutSetUp(mat->cmap)); 3084 3085 /* check if the matrix sizes are correct */ 3086 PetscCall(MatGetSize(mat, &rows, &cols)); 3087 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3088 3089 /* read in row lengths and build row indices */ 3090 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3091 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3092 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3093 rowidxs[0] = 0; 3094 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3095 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3096 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3097 /* read in column indices and matrix values */ 3098 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3100 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3101 /* store matrix indices and values */ 3102 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3103 PetscCall(PetscFree(rowidxs)); 3104 PetscCall(PetscFree2(colidxs, matvals)); 3105 PetscFunctionReturn(0); 3106 } 3107 3108 /* Not scalable because of ISAllGather() unless getting all columns. */ 3109 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3110 { 3111 IS iscol_local; 3112 PetscBool isstride; 3113 PetscMPIInt lisstride = 0, gisstride; 3114 3115 PetscFunctionBegin; 3116 /* check if we are grabbing all columns*/ 3117 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3118 3119 if (isstride) { 3120 PetscInt start, len, mstart, mlen; 3121 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3122 PetscCall(ISGetLocalSize(iscol, &len)); 3123 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3124 if (mstart == start && mlen - mstart == len) lisstride = 1; 3125 } 3126 3127 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3128 if (gisstride) { 3129 PetscInt N; 3130 PetscCall(MatGetSize(mat, NULL, &N)); 3131 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3132 PetscCall(ISSetIdentity(iscol_local)); 3133 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3134 } else { 3135 PetscInt cbs; 3136 PetscCall(ISGetBlockSize(iscol, &cbs)); 3137 PetscCall(ISAllGather(iscol, &iscol_local)); 3138 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3139 } 3140 3141 *isseq = iscol_local; 3142 PetscFunctionReturn(0); 3143 } 3144 3145 /* 3146 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3147 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3148 3149 Input Parameters: 3150 mat - matrix 3151 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3152 i.e., mat->rstart <= isrow[i] < mat->rend 3153 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3154 i.e., mat->cstart <= iscol[i] < mat->cend 3155 Output Parameter: 3156 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3157 iscol_o - sequential column index set for retrieving mat->B 3158 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3159 */ 3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3161 { 3162 Vec x, cmap; 3163 const PetscInt *is_idx; 3164 PetscScalar *xarray, *cmaparray; 3165 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3167 Mat B = a->B; 3168 Vec lvec = a->lvec, lcmap; 3169 PetscInt i, cstart, cend, Bn = B->cmap->N; 3170 MPI_Comm comm; 3171 VecScatter Mvctx = a->Mvctx; 3172 3173 PetscFunctionBegin; 3174 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3175 PetscCall(ISGetLocalSize(iscol, &ncols)); 3176 3177 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3178 PetscCall(MatCreateVecs(mat, &x, NULL)); 3179 PetscCall(VecSet(x, -1.0)); 3180 PetscCall(VecDuplicate(x, &cmap)); 3181 PetscCall(VecSet(cmap, -1.0)); 3182 3183 /* Get start indices */ 3184 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3185 isstart -= ncols; 3186 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3187 3188 PetscCall(ISGetIndices(iscol, &is_idx)); 3189 PetscCall(VecGetArray(x, &xarray)); 3190 PetscCall(VecGetArray(cmap, &cmaparray)); 3191 PetscCall(PetscMalloc1(ncols, &idx)); 3192 for (i = 0; i < ncols; i++) { 3193 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3194 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3195 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3196 } 3197 PetscCall(VecRestoreArray(x, &xarray)); 3198 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3199 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3200 3201 /* Get iscol_d */ 3202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3203 PetscCall(ISGetBlockSize(iscol, &i)); 3204 PetscCall(ISSetBlockSize(*iscol_d, i)); 3205 3206 /* Get isrow_d */ 3207 PetscCall(ISGetLocalSize(isrow, &m)); 3208 rstart = mat->rmap->rstart; 3209 PetscCall(PetscMalloc1(m, &idx)); 3210 PetscCall(ISGetIndices(isrow, &is_idx)); 3211 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3212 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3213 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3215 PetscCall(ISGetBlockSize(isrow, &i)); 3216 PetscCall(ISSetBlockSize(*isrow_d, i)); 3217 3218 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3219 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 PetscCall(VecDuplicate(lvec, &lcmap)); 3223 3224 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3225 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 3227 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3228 /* off-process column indices */ 3229 count = 0; 3230 PetscCall(PetscMalloc1(Bn, &idx)); 3231 PetscCall(PetscMalloc1(Bn, &cmap1)); 3232 3233 PetscCall(VecGetArray(lvec, &xarray)); 3234 PetscCall(VecGetArray(lcmap, &cmaparray)); 3235 for (i = 0; i < Bn; i++) { 3236 if (PetscRealPart(xarray[i]) > -1.0) { 3237 idx[count] = i; /* local column index in off-diagonal part B */ 3238 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3239 count++; 3240 } 3241 } 3242 PetscCall(VecRestoreArray(lvec, &xarray)); 3243 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3244 3245 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3246 /* cannot ensure iscol_o has same blocksize as iscol! */ 3247 3248 PetscCall(PetscFree(idx)); 3249 *garray = cmap1; 3250 3251 PetscCall(VecDestroy(&x)); 3252 PetscCall(VecDestroy(&cmap)); 3253 PetscCall(VecDestroy(&lcmap)); 3254 PetscFunctionReturn(0); 3255 } 3256 3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3259 { 3260 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3261 Mat M = NULL; 3262 MPI_Comm comm; 3263 IS iscol_d, isrow_d, iscol_o; 3264 Mat Asub = NULL, Bsub = NULL; 3265 PetscInt n; 3266 3267 PetscFunctionBegin; 3268 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3269 3270 if (call == MAT_REUSE_MATRIX) { 3271 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3273 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3276 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3277 3278 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3279 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3280 3281 /* Update diagonal and off-diagonal portions of submat */ 3282 asub = (Mat_MPIAIJ *)(*submat)->data; 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3284 PetscCall(ISGetLocalSize(iscol_o, &n)); 3285 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3286 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3287 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3288 3289 } else { /* call == MAT_INITIAL_MATRIX) */ 3290 const PetscInt *garray; 3291 PetscInt BsubN; 3292 3293 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3294 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3295 3296 /* Create local submatrices Asub and Bsub */ 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3299 3300 /* Create submatrix M */ 3301 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3302 3303 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3304 asub = (Mat_MPIAIJ *)M->data; 3305 3306 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3307 n = asub->B->cmap->N; 3308 if (BsubN > n) { 3309 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3310 const PetscInt *idx; 3311 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3312 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3313 3314 PetscCall(PetscMalloc1(n, &idx_new)); 3315 j = 0; 3316 PetscCall(ISGetIndices(iscol_o, &idx)); 3317 for (i = 0; i < n; i++) { 3318 if (j >= BsubN) break; 3319 while (subgarray[i] > garray[j]) j++; 3320 3321 if (subgarray[i] == garray[j]) { 3322 idx_new[i] = idx[j++]; 3323 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3324 } 3325 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3326 3327 PetscCall(ISDestroy(&iscol_o)); 3328 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3329 3330 } else if (BsubN < n) { 3331 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3332 } 3333 3334 PetscCall(PetscFree(garray)); 3335 *submat = M; 3336 3337 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3338 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3339 PetscCall(ISDestroy(&isrow_d)); 3340 3341 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3342 PetscCall(ISDestroy(&iscol_d)); 3343 3344 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3345 PetscCall(ISDestroy(&iscol_o)); 3346 } 3347 PetscFunctionReturn(0); 3348 } 3349 3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3351 { 3352 IS iscol_local = NULL, isrow_d; 3353 PetscInt csize; 3354 PetscInt n, i, j, start, end; 3355 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3356 MPI_Comm comm; 3357 3358 PetscFunctionBegin; 3359 /* If isrow has same processor distribution as mat, 3360 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3361 if (call == MAT_REUSE_MATRIX) { 3362 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3363 if (isrow_d) { 3364 sameRowDist = PETSC_TRUE; 3365 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3366 } else { 3367 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3368 if (iscol_local) { 3369 sameRowDist = PETSC_TRUE; 3370 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3371 } 3372 } 3373 } else { 3374 /* Check if isrow has same processor distribution as mat */ 3375 sameDist[0] = PETSC_FALSE; 3376 PetscCall(ISGetLocalSize(isrow, &n)); 3377 if (!n) { 3378 sameDist[0] = PETSC_TRUE; 3379 } else { 3380 PetscCall(ISGetMinMax(isrow, &i, &j)); 3381 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3382 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3383 } 3384 3385 /* Check if iscol has same processor distribution as mat */ 3386 sameDist[1] = PETSC_FALSE; 3387 PetscCall(ISGetLocalSize(iscol, &n)); 3388 if (!n) { 3389 sameDist[1] = PETSC_TRUE; 3390 } else { 3391 PetscCall(ISGetMinMax(iscol, &i, &j)); 3392 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3393 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3394 } 3395 3396 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3397 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3398 sameRowDist = tsameDist[0]; 3399 } 3400 3401 if (sameRowDist) { 3402 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3403 /* isrow and iscol have same processor distribution as mat */ 3404 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3405 PetscFunctionReturn(0); 3406 } else { /* sameRowDist */ 3407 /* isrow has same processor distribution as mat */ 3408 if (call == MAT_INITIAL_MATRIX) { 3409 PetscBool sorted; 3410 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3411 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3412 PetscCall(ISGetSize(iscol, &i)); 3413 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3414 3415 PetscCall(ISSorted(iscol_local, &sorted)); 3416 if (sorted) { 3417 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3418 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3419 PetscFunctionReturn(0); 3420 } 3421 } else { /* call == MAT_REUSE_MATRIX */ 3422 IS iscol_sub; 3423 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3424 if (iscol_sub) { 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3426 PetscFunctionReturn(0); 3427 } 3428 } 3429 } 3430 } 3431 3432 /* General case: iscol -> iscol_local which has global size of iscol */ 3433 if (call == MAT_REUSE_MATRIX) { 3434 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3435 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3436 } else { 3437 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3438 } 3439 3440 PetscCall(ISGetLocalSize(iscol, &csize)); 3441 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3442 3443 if (call == MAT_INITIAL_MATRIX) { 3444 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3445 PetscCall(ISDestroy(&iscol_local)); 3446 } 3447 PetscFunctionReturn(0); 3448 } 3449 3450 /*@C 3451 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3452 and "off-diagonal" part of the matrix in CSR format. 3453 3454 Collective 3455 3456 Input Parameters: 3457 + comm - MPI communicator 3458 . A - "diagonal" portion of matrix 3459 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3460 - garray - global index of B columns 3461 3462 Output Parameter: 3463 . mat - the matrix, with input A as its local diagonal matrix 3464 Level: advanced 3465 3466 Notes: 3467 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3468 3469 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3470 3471 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3472 @*/ 3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3474 { 3475 Mat_MPIAIJ *maij; 3476 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3477 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3478 const PetscScalar *oa; 3479 Mat Bnew; 3480 PetscInt m, n, N; 3481 MatType mpi_mat_type; 3482 3483 PetscFunctionBegin; 3484 PetscCall(MatCreate(comm, mat)); 3485 PetscCall(MatGetSize(A, &m, &n)); 3486 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3487 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3488 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3489 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3490 3491 /* Get global columns of mat */ 3492 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3493 3494 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3495 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3496 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3497 PetscCall(MatSetType(*mat, mpi_mat_type)); 3498 3499 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3500 maij = (Mat_MPIAIJ *)(*mat)->data; 3501 3502 (*mat)->preallocated = PETSC_TRUE; 3503 3504 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3505 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3506 3507 /* Set A as diagonal portion of *mat */ 3508 maij->A = A; 3509 3510 nz = oi[m]; 3511 for (i = 0; i < nz; i++) { 3512 col = oj[i]; 3513 oj[i] = garray[col]; 3514 } 3515 3516 /* Set Bnew as off-diagonal portion of *mat */ 3517 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3518 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3519 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3520 bnew = (Mat_SeqAIJ *)Bnew->data; 3521 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3522 maij->B = Bnew; 3523 3524 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3525 3526 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3527 b->free_a = PETSC_FALSE; 3528 b->free_ij = PETSC_FALSE; 3529 PetscCall(MatDestroy(&B)); 3530 3531 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3532 bnew->free_a = PETSC_TRUE; 3533 bnew->free_ij = PETSC_TRUE; 3534 3535 /* condense columns of maij->B */ 3536 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3537 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3538 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3539 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3540 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3541 PetscFunctionReturn(0); 3542 } 3543 3544 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3545 3546 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3547 { 3548 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3549 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3550 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3551 Mat M, Msub, B = a->B; 3552 MatScalar *aa; 3553 Mat_SeqAIJ *aij; 3554 PetscInt *garray = a->garray, *colsub, Ncols; 3555 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3556 IS iscol_sub, iscmap; 3557 const PetscInt *is_idx, *cmap; 3558 PetscBool allcolumns = PETSC_FALSE; 3559 MPI_Comm comm; 3560 3561 PetscFunctionBegin; 3562 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3563 if (call == MAT_REUSE_MATRIX) { 3564 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3565 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3566 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3567 3568 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3569 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3570 3571 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3572 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3573 3574 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3575 3576 } else { /* call == MAT_INITIAL_MATRIX) */ 3577 PetscBool flg; 3578 3579 PetscCall(ISGetLocalSize(iscol, &n)); 3580 PetscCall(ISGetSize(iscol, &Ncols)); 3581 3582 /* (1) iscol -> nonscalable iscol_local */ 3583 /* Check for special case: each processor gets entire matrix columns */ 3584 PetscCall(ISIdentity(iscol_local, &flg)); 3585 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3586 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3587 if (allcolumns) { 3588 iscol_sub = iscol_local; 3589 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3590 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3591 3592 } else { 3593 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3594 PetscInt *idx, *cmap1, k; 3595 PetscCall(PetscMalloc1(Ncols, &idx)); 3596 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3597 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3598 count = 0; 3599 k = 0; 3600 for (i = 0; i < Ncols; i++) { 3601 j = is_idx[i]; 3602 if (j >= cstart && j < cend) { 3603 /* diagonal part of mat */ 3604 idx[count] = j; 3605 cmap1[count++] = i; /* column index in submat */ 3606 } else if (Bn) { 3607 /* off-diagonal part of mat */ 3608 if (j == garray[k]) { 3609 idx[count] = j; 3610 cmap1[count++] = i; /* column index in submat */ 3611 } else if (j > garray[k]) { 3612 while (j > garray[k] && k < Bn - 1) k++; 3613 if (j == garray[k]) { 3614 idx[count] = j; 3615 cmap1[count++] = i; /* column index in submat */ 3616 } 3617 } 3618 } 3619 } 3620 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3621 3622 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3623 PetscCall(ISGetBlockSize(iscol, &cbs)); 3624 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3625 3626 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3627 } 3628 3629 /* (3) Create sequential Msub */ 3630 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3631 } 3632 3633 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3634 aij = (Mat_SeqAIJ *)(Msub)->data; 3635 ii = aij->i; 3636 PetscCall(ISGetIndices(iscmap, &cmap)); 3637 3638 /* 3639 m - number of local rows 3640 Ncols - number of columns (same on all processors) 3641 rstart - first row in new global matrix generated 3642 */ 3643 PetscCall(MatGetSize(Msub, &m, NULL)); 3644 3645 if (call == MAT_INITIAL_MATRIX) { 3646 /* (4) Create parallel newmat */ 3647 PetscMPIInt rank, size; 3648 PetscInt csize; 3649 3650 PetscCallMPI(MPI_Comm_size(comm, &size)); 3651 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3652 3653 /* 3654 Determine the number of non-zeros in the diagonal and off-diagonal 3655 portions of the matrix in order to do correct preallocation 3656 */ 3657 3658 /* first get start and end of "diagonal" columns */ 3659 PetscCall(ISGetLocalSize(iscol, &csize)); 3660 if (csize == PETSC_DECIDE) { 3661 PetscCall(ISGetSize(isrow, &mglobal)); 3662 if (mglobal == Ncols) { /* square matrix */ 3663 nlocal = m; 3664 } else { 3665 nlocal = Ncols / size + ((Ncols % size) > rank); 3666 } 3667 } else { 3668 nlocal = csize; 3669 } 3670 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3671 rstart = rend - nlocal; 3672 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3673 3674 /* next, compute all the lengths */ 3675 jj = aij->j; 3676 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3677 olens = dlens + m; 3678 for (i = 0; i < m; i++) { 3679 jend = ii[i + 1] - ii[i]; 3680 olen = 0; 3681 dlen = 0; 3682 for (j = 0; j < jend; j++) { 3683 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3684 else dlen++; 3685 jj++; 3686 } 3687 olens[i] = olen; 3688 dlens[i] = dlen; 3689 } 3690 3691 PetscCall(ISGetBlockSize(isrow, &bs)); 3692 PetscCall(ISGetBlockSize(iscol, &cbs)); 3693 3694 PetscCall(MatCreate(comm, &M)); 3695 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3696 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3697 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3698 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3699 PetscCall(PetscFree(dlens)); 3700 3701 } else { /* call == MAT_REUSE_MATRIX */ 3702 M = *newmat; 3703 PetscCall(MatGetLocalSize(M, &i, NULL)); 3704 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3705 PetscCall(MatZeroEntries(M)); 3706 /* 3707 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3708 rather than the slower MatSetValues(). 3709 */ 3710 M->was_assembled = PETSC_TRUE; 3711 M->assembled = PETSC_FALSE; 3712 } 3713 3714 /* (5) Set values of Msub to *newmat */ 3715 PetscCall(PetscMalloc1(count, &colsub)); 3716 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3717 3718 jj = aij->j; 3719 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3720 for (i = 0; i < m; i++) { 3721 row = rstart + i; 3722 nz = ii[i + 1] - ii[i]; 3723 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3724 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3725 jj += nz; 3726 aa += nz; 3727 } 3728 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3729 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3730 3731 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3732 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3733 3734 PetscCall(PetscFree(colsub)); 3735 3736 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3737 if (call == MAT_INITIAL_MATRIX) { 3738 *newmat = M; 3739 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3740 PetscCall(MatDestroy(&Msub)); 3741 3742 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3743 PetscCall(ISDestroy(&iscol_sub)); 3744 3745 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3746 PetscCall(ISDestroy(&iscmap)); 3747 3748 if (iscol_local) { 3749 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3750 PetscCall(ISDestroy(&iscol_local)); 3751 } 3752 } 3753 PetscFunctionReturn(0); 3754 } 3755 3756 /* 3757 Not great since it makes two copies of the submatrix, first an SeqAIJ 3758 in local and then by concatenating the local matrices the end result. 3759 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3760 3761 This requires a sequential iscol with all indices. 3762 */ 3763 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3764 { 3765 PetscMPIInt rank, size; 3766 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3767 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3768 Mat M, Mreuse; 3769 MatScalar *aa, *vwork; 3770 MPI_Comm comm; 3771 Mat_SeqAIJ *aij; 3772 PetscBool colflag, allcolumns = PETSC_FALSE; 3773 3774 PetscFunctionBegin; 3775 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3776 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3777 PetscCallMPI(MPI_Comm_size(comm, &size)); 3778 3779 /* Check for special case: each processor gets entire matrix columns */ 3780 PetscCall(ISIdentity(iscol, &colflag)); 3781 PetscCall(ISGetLocalSize(iscol, &n)); 3782 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3783 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3784 3785 if (call == MAT_REUSE_MATRIX) { 3786 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3787 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3788 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3789 } else { 3790 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3791 } 3792 3793 /* 3794 m - number of local rows 3795 n - number of columns (same on all processors) 3796 rstart - first row in new global matrix generated 3797 */ 3798 PetscCall(MatGetSize(Mreuse, &m, &n)); 3799 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3800 if (call == MAT_INITIAL_MATRIX) { 3801 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3802 ii = aij->i; 3803 jj = aij->j; 3804 3805 /* 3806 Determine the number of non-zeros in the diagonal and off-diagonal 3807 portions of the matrix in order to do correct preallocation 3808 */ 3809 3810 /* first get start and end of "diagonal" columns */ 3811 if (csize == PETSC_DECIDE) { 3812 PetscCall(ISGetSize(isrow, &mglobal)); 3813 if (mglobal == n) { /* square matrix */ 3814 nlocal = m; 3815 } else { 3816 nlocal = n / size + ((n % size) > rank); 3817 } 3818 } else { 3819 nlocal = csize; 3820 } 3821 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3822 rstart = rend - nlocal; 3823 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3824 3825 /* next, compute all the lengths */ 3826 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3827 olens = dlens + m; 3828 for (i = 0; i < m; i++) { 3829 jend = ii[i + 1] - ii[i]; 3830 olen = 0; 3831 dlen = 0; 3832 for (j = 0; j < jend; j++) { 3833 if (*jj < rstart || *jj >= rend) olen++; 3834 else dlen++; 3835 jj++; 3836 } 3837 olens[i] = olen; 3838 dlens[i] = dlen; 3839 } 3840 PetscCall(MatCreate(comm, &M)); 3841 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3842 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3843 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3844 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3845 PetscCall(PetscFree(dlens)); 3846 } else { 3847 PetscInt ml, nl; 3848 3849 M = *newmat; 3850 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3851 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3852 PetscCall(MatZeroEntries(M)); 3853 /* 3854 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3855 rather than the slower MatSetValues(). 3856 */ 3857 M->was_assembled = PETSC_TRUE; 3858 M->assembled = PETSC_FALSE; 3859 } 3860 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3861 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3862 ii = aij->i; 3863 jj = aij->j; 3864 3865 /* trigger copy to CPU if needed */ 3866 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3867 for (i = 0; i < m; i++) { 3868 row = rstart + i; 3869 nz = ii[i + 1] - ii[i]; 3870 cwork = jj; 3871 jj += nz; 3872 vwork = aa; 3873 aa += nz; 3874 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3875 } 3876 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3877 3878 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3879 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3880 *newmat = M; 3881 3882 /* save submatrix used in processor for next request */ 3883 if (call == MAT_INITIAL_MATRIX) { 3884 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3885 PetscCall(MatDestroy(&Mreuse)); 3886 } 3887 PetscFunctionReturn(0); 3888 } 3889 3890 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3891 { 3892 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3893 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3894 const PetscInt *JJ; 3895 PetscBool nooffprocentries; 3896 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3897 3898 PetscFunctionBegin; 3899 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3900 3901 PetscCall(PetscLayoutSetUp(B->rmap)); 3902 PetscCall(PetscLayoutSetUp(B->cmap)); 3903 m = B->rmap->n; 3904 cstart = B->cmap->rstart; 3905 cend = B->cmap->rend; 3906 rstart = B->rmap->rstart; 3907 3908 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3909 3910 if (PetscDefined(USE_DEBUG)) { 3911 for (i = 0; i < m; i++) { 3912 nnz = Ii[i + 1] - Ii[i]; 3913 JJ = J + Ii[i]; 3914 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3915 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3916 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3917 } 3918 } 3919 3920 for (i = 0; i < m; i++) { 3921 nnz = Ii[i + 1] - Ii[i]; 3922 JJ = J + Ii[i]; 3923 nnz_max = PetscMax(nnz_max, nnz); 3924 d = 0; 3925 for (j = 0; j < nnz; j++) { 3926 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3927 } 3928 d_nnz[i] = d; 3929 o_nnz[i] = nnz - d; 3930 } 3931 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3932 PetscCall(PetscFree2(d_nnz, o_nnz)); 3933 3934 for (i = 0; i < m; i++) { 3935 ii = i + rstart; 3936 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3937 } 3938 nooffprocentries = B->nooffprocentries; 3939 B->nooffprocentries = PETSC_TRUE; 3940 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3941 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3942 B->nooffprocentries = nooffprocentries; 3943 3944 /* count number of entries below block diagonal */ 3945 PetscCall(PetscFree(Aij->ld)); 3946 PetscCall(PetscCalloc1(m, &ld)); 3947 Aij->ld = ld; 3948 for (i = 0; i < m; i++) { 3949 nnz = Ii[i + 1] - Ii[i]; 3950 j = 0; 3951 while (j < nnz && J[j] < cstart) j++; 3952 ld[i] = j; 3953 J += nnz; 3954 } 3955 3956 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3957 PetscFunctionReturn(0); 3958 } 3959 3960 /*@ 3961 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3962 (the default parallel PETSc format). 3963 3964 Collective 3965 3966 Input Parameters: 3967 + B - the matrix 3968 . i - the indices into j for the start of each local row (starts with zero) 3969 . j - the column indices for each local row (starts with zero) 3970 - v - optional values in the matrix 3971 3972 Level: developer 3973 3974 Notes: 3975 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3976 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3977 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3978 3979 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3980 3981 The format which is used for the sparse matrix input, is equivalent to a 3982 row-major ordering.. i.e for the following matrix, the input data expected is 3983 as shown 3984 3985 $ 1 0 0 3986 $ 2 0 3 P0 3987 $ ------- 3988 $ 4 5 6 P1 3989 $ 3990 $ Process0 [P0]: rows_owned=[0,1] 3991 $ i = {0,1,3} [size = nrow+1 = 2+1] 3992 $ j = {0,0,2} [size = 3] 3993 $ v = {1,2,3} [size = 3] 3994 $ 3995 $ Process1 [P1]: rows_owned=[2] 3996 $ i = {0,3} [size = nrow+1 = 1+1] 3997 $ j = {0,1,2} [size = 3] 3998 $ v = {4,5,6} [size = 3] 3999 4000 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 4001 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4002 @*/ 4003 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4004 { 4005 PetscFunctionBegin; 4006 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4007 PetscFunctionReturn(0); 4008 } 4009 4010 /*@C 4011 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4012 (the default parallel PETSc format). For good matrix assembly performance 4013 the user should preallocate the matrix storage by setting the parameters 4014 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4015 performance can be increased by more than a factor of 50. 4016 4017 Collective 4018 4019 Input Parameters: 4020 + B - the matrix 4021 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4022 (same value is used for all local rows) 4023 . d_nnz - array containing the number of nonzeros in the various rows of the 4024 DIAGONAL portion of the local submatrix (possibly different for each row) 4025 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 4026 The size of this array is equal to the number of local rows, i.e 'm'. 4027 For matrices that will be factored, you must leave room for (and set) 4028 the diagonal entry even if it is zero. 4029 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4030 submatrix (same value is used for all local rows). 4031 - o_nnz - array containing the number of nonzeros in the various rows of the 4032 OFF-DIAGONAL portion of the local submatrix (possibly different for 4033 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 4034 structure. The size of this array is equal to the number 4035 of local rows, i.e 'm'. 4036 4037 If the *_nnz parameter is given then the *_nz parameter is ignored 4038 4039 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 4040 storage. The stored row and column indices begin with zero. 4041 See [Sparse Matrices](sec_matsparse) for details. 4042 4043 The parallel matrix is partitioned such that the first m0 rows belong to 4044 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4045 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4046 4047 The DIAGONAL portion of the local submatrix of a processor can be defined 4048 as the submatrix which is obtained by extraction the part corresponding to 4049 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4050 first row that belongs to the processor, r2 is the last row belonging to 4051 the this processor, and c1-c2 is range of indices of the local part of a 4052 vector suitable for applying the matrix to. This is an mxn matrix. In the 4053 common case of a square matrix, the row and column ranges are the same and 4054 the DIAGONAL part is also square. The remaining portion of the local 4055 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4056 4057 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4058 4059 You can call MatGetInfo() to get information on how effective the preallocation was; 4060 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4061 You can also run with the option -info and look for messages with the string 4062 malloc in them to see if additional memory allocation was needed. 4063 4064 Example usage: 4065 4066 Consider the following 8x8 matrix with 34 non-zero values, that is 4067 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4068 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4069 as follows: 4070 4071 .vb 4072 1 2 0 | 0 3 0 | 0 4 4073 Proc0 0 5 6 | 7 0 0 | 8 0 4074 9 0 10 | 11 0 0 | 12 0 4075 ------------------------------------- 4076 13 0 14 | 15 16 17 | 0 0 4077 Proc1 0 18 0 | 19 20 21 | 0 0 4078 0 0 0 | 22 23 0 | 24 0 4079 ------------------------------------- 4080 Proc2 25 26 27 | 0 0 28 | 29 0 4081 30 0 0 | 31 32 33 | 0 34 4082 .ve 4083 4084 This can be represented as a collection of submatrices as: 4085 4086 .vb 4087 A B C 4088 D E F 4089 G H I 4090 .ve 4091 4092 Where the submatrices A,B,C are owned by proc0, D,E,F are 4093 owned by proc1, G,H,I are owned by proc2. 4094 4095 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'M','N' parameters are 8,8, and have the same values on all procs. 4098 4099 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4100 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4101 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4102 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4103 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 4104 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4105 4106 When d_nz, o_nz parameters are specified, d_nz storage elements are 4107 allocated for every row of the local diagonal submatrix, and o_nz 4108 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4109 One way to choose d_nz and o_nz is to use the max nonzerors per local 4110 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4111 In this case, the values of d_nz,o_nz are: 4112 .vb 4113 proc0 : dnz = 2, o_nz = 2 4114 proc1 : dnz = 3, o_nz = 2 4115 proc2 : dnz = 1, o_nz = 4 4116 .ve 4117 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4118 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4119 for proc3. i.e we are using 12+15+10=37 storage locations to store 4120 34 values. 4121 4122 When d_nnz, o_nnz parameters are specified, the storage is specified 4123 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4124 In the above case the values for d_nnz,o_nnz are: 4125 .vb 4126 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4127 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4128 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4129 .ve 4130 Here the space allocated is sum of all the above values i.e 34, and 4131 hence pre-allocation is perfect. 4132 4133 Level: intermediate 4134 4135 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4136 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4137 @*/ 4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4139 { 4140 PetscFunctionBegin; 4141 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4142 PetscValidType(B, 1); 4143 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4144 PetscFunctionReturn(0); 4145 } 4146 4147 /*@ 4148 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4149 CSR format for the local rows. 4150 4151 Collective 4152 4153 Input Parameters: 4154 + comm - MPI communicator 4155 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4156 . n - This value should be the same as the local size used in creating the 4157 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4158 calculated if N is given) For square matrices n is almost always m. 4159 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4160 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4161 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4162 . j - column indices 4163 - a - optional matrix values 4164 4165 Output Parameter: 4166 . mat - the matrix 4167 4168 Level: intermediate 4169 4170 Notes: 4171 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4172 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4173 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4174 4175 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4176 4177 The format which is used for the sparse matrix input, is equivalent to a 4178 row-major ordering.. i.e for the following matrix, the input data expected is 4179 as shown 4180 4181 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4182 4183 $ 1 0 0 4184 $ 2 0 3 P0 4185 $ ------- 4186 $ 4 5 6 P1 4187 $ 4188 $ Process0 [P0]: rows_owned=[0,1] 4189 $ i = {0,1,3} [size = nrow+1 = 2+1] 4190 $ j = {0,0,2} [size = 3] 4191 $ v = {1,2,3} [size = 3] 4192 $ 4193 $ Process1 [P1]: rows_owned=[2] 4194 $ i = {0,3} [size = nrow+1 = 1+1] 4195 $ j = {0,1,2} [size = 3] 4196 $ v = {4,5,6} [size = 3] 4197 4198 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4199 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4200 @*/ 4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4202 { 4203 PetscFunctionBegin; 4204 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4205 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4206 PetscCall(MatCreate(comm, mat)); 4207 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4208 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4209 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4210 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4211 PetscFunctionReturn(0); 4212 } 4213 4214 /*@ 4215 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4216 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4217 4218 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4219 4220 Collective 4221 4222 Input Parameters: 4223 + mat - the matrix 4224 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4225 . n - This value should be the same as the local size used in creating the 4226 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4227 calculated if N is given) For square matrices n is almost always m. 4228 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4229 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4230 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4231 . J - column indices 4232 - v - matrix values 4233 4234 Level: intermediate 4235 4236 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4237 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4238 @*/ 4239 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4240 { 4241 PetscInt nnz, i; 4242 PetscBool nooffprocentries; 4243 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4244 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4245 PetscScalar *ad, *ao; 4246 PetscInt ldi, Iii, md; 4247 const PetscInt *Adi = Ad->i; 4248 PetscInt *ld = Aij->ld; 4249 4250 PetscFunctionBegin; 4251 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4252 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4253 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4254 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4255 4256 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4257 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4258 4259 for (i = 0; i < m; i++) { 4260 nnz = Ii[i + 1] - Ii[i]; 4261 Iii = Ii[i]; 4262 ldi = ld[i]; 4263 md = Adi[i + 1] - Adi[i]; 4264 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4265 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4266 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4267 ad += md; 4268 ao += nnz - md; 4269 } 4270 nooffprocentries = mat->nooffprocentries; 4271 mat->nooffprocentries = PETSC_TRUE; 4272 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4273 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4274 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4275 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4276 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4277 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4278 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4279 mat->nooffprocentries = nooffprocentries; 4280 PetscFunctionReturn(0); 4281 } 4282 4283 /*@ 4284 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4285 4286 Collective 4287 4288 Input Parameters: 4289 + mat - the matrix 4290 - v - matrix values, stored by row 4291 4292 Level: intermediate 4293 4294 Note: 4295 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4296 4297 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4298 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4299 @*/ 4300 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4301 { 4302 PetscInt nnz, i, m; 4303 PetscBool nooffprocentries; 4304 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4307 PetscScalar *ad, *ao; 4308 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4309 PetscInt ldi, Iii, md; 4310 PetscInt *ld = Aij->ld; 4311 4312 PetscFunctionBegin; 4313 m = mat->rmap->n; 4314 4315 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4316 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4317 Iii = 0; 4318 for (i = 0; i < m; i++) { 4319 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4320 ldi = ld[i]; 4321 md = Adi[i + 1] - Adi[i]; 4322 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4323 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4324 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4325 ad += md; 4326 ao += nnz - md; 4327 Iii += nnz; 4328 } 4329 nooffprocentries = mat->nooffprocentries; 4330 mat->nooffprocentries = PETSC_TRUE; 4331 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4332 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4333 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4334 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4335 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4336 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4337 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4338 mat->nooffprocentries = nooffprocentries; 4339 PetscFunctionReturn(0); 4340 } 4341 4342 /*@C 4343 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4344 (the default parallel PETSc format). For good matrix assembly performance 4345 the user should preallocate the matrix storage by setting the parameters 4346 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4347 performance can be increased by more than a factor of 50. 4348 4349 Collective 4350 4351 Input Parameters: 4352 + comm - MPI communicator 4353 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4354 This value should be the same as the local size used in creating the 4355 y vector for the matrix-vector product y = Ax. 4356 . n - This value should be the same as the local size used in creating the 4357 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4358 calculated if N is given) For square matrices n is almost always m. 4359 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4360 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4361 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4362 (same value is used for all local rows) 4363 . d_nnz - array containing the number of nonzeros in the various rows of the 4364 DIAGONAL portion of the local submatrix (possibly different for each row) 4365 or NULL, if d_nz is used to specify the nonzero structure. 4366 The size of this array is equal to the number of local rows, i.e 'm'. 4367 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4368 submatrix (same value is used for all local rows). 4369 - o_nnz - array containing the number of nonzeros in the various rows of the 4370 OFF-DIAGONAL portion of the local submatrix (possibly different for 4371 each row) or NULL, if o_nz is used to specify the nonzero 4372 structure. The size of this array is equal to the number 4373 of local rows, i.e 'm'. 4374 4375 Output Parameter: 4376 . A - the matrix 4377 4378 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4379 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4380 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4381 4382 Notes: 4383 If the *_nnz parameter is given then the *_nz parameter is ignored 4384 4385 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4386 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4387 storage requirements for this matrix. 4388 4389 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4390 processor than it must be used on all processors that share the object for 4391 that argument. 4392 4393 The user MUST specify either the local or global matrix dimensions 4394 (possibly both). 4395 4396 The parallel matrix is partitioned across processors such that the 4397 first m0 rows belong to process 0, the next m1 rows belong to 4398 process 1, the next m2 rows belong to process 2 etc.. where 4399 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4400 values corresponding to [m x N] submatrix. 4401 4402 The columns are logically partitioned with the n0 columns belonging 4403 to 0th partition, the next n1 columns belonging to the next 4404 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4405 4406 The DIAGONAL portion of the local submatrix on any given processor 4407 is the submatrix corresponding to the rows and columns m,n 4408 corresponding to the given processor. i.e diagonal matrix on 4409 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4410 etc. The remaining portion of the local submatrix [m x (N-n)] 4411 constitute the OFF-DIAGONAL portion. The example below better 4412 illustrates this concept. 4413 4414 For a square global matrix we define each processor's diagonal portion 4415 to be its local rows and the corresponding columns (a square submatrix); 4416 each processor's off-diagonal portion encompasses the remainder of the 4417 local matrix (a rectangular submatrix). 4418 4419 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4420 4421 When calling this routine with a single process communicator, a matrix of 4422 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4423 type of communicator, use the construction mechanism 4424 .vb 4425 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4426 .ve 4427 4428 $ MatCreate(...,&A); 4429 $ MatSetType(A,MATMPIAIJ); 4430 $ MatSetSizes(A, m,n,M,N); 4431 $ MatMPIAIJSetPreallocation(A,...); 4432 4433 By default, this format uses inodes (identical nodes) when possible. 4434 We search for consecutive rows with the same nonzero structure, thereby 4435 reusing matrix information to achieve increased efficiency. 4436 4437 Options Database Keys: 4438 + -mat_no_inode - Do not use inodes 4439 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4440 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4441 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4442 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4443 4444 Example usage: 4445 4446 Consider the following 8x8 matrix with 34 non-zero values, that is 4447 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4448 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4449 as follows 4450 4451 .vb 4452 1 2 0 | 0 3 0 | 0 4 4453 Proc0 0 5 6 | 7 0 0 | 8 0 4454 9 0 10 | 11 0 0 | 12 0 4455 ------------------------------------- 4456 13 0 14 | 15 16 17 | 0 0 4457 Proc1 0 18 0 | 19 20 21 | 0 0 4458 0 0 0 | 22 23 0 | 24 0 4459 ------------------------------------- 4460 Proc2 25 26 27 | 0 0 28 | 29 0 4461 30 0 0 | 31 32 33 | 0 34 4462 .ve 4463 4464 This can be represented as a collection of submatrices as 4465 4466 .vb 4467 A B C 4468 D E F 4469 G H I 4470 .ve 4471 4472 Where the submatrices A,B,C are owned by proc0, D,E,F are 4473 owned by proc1, G,H,I are owned by proc2. 4474 4475 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4476 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4477 The 'M','N' parameters are 8,8, and have the same values on all procs. 4478 4479 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4480 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4481 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4482 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4483 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4484 matrix, ans [DF] as another SeqAIJ matrix. 4485 4486 When d_nz, o_nz parameters are specified, d_nz storage elements are 4487 allocated for every row of the local diagonal submatrix, and o_nz 4488 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4489 One way to choose d_nz and o_nz is to use the max nonzerors per local 4490 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4491 In this case, the values of d_nz,o_nz are 4492 .vb 4493 proc0 : dnz = 2, o_nz = 2 4494 proc1 : dnz = 3, o_nz = 2 4495 proc2 : dnz = 1, o_nz = 4 4496 .ve 4497 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4498 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4499 for proc3. i.e we are using 12+15+10=37 storage locations to store 4500 34 values. 4501 4502 When d_nnz, o_nnz parameters are specified, the storage is specified 4503 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4504 In the above case the values for d_nnz,o_nnz are 4505 .vb 4506 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4507 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4508 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4509 .ve 4510 Here the space allocated is sum of all the above values i.e 34, and 4511 hence pre-allocation is perfect. 4512 4513 Level: intermediate 4514 4515 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4516 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4517 @*/ 4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4519 { 4520 PetscMPIInt size; 4521 4522 PetscFunctionBegin; 4523 PetscCall(MatCreate(comm, A)); 4524 PetscCall(MatSetSizes(*A, m, n, M, N)); 4525 PetscCallMPI(MPI_Comm_size(comm, &size)); 4526 if (size > 1) { 4527 PetscCall(MatSetType(*A, MATMPIAIJ)); 4528 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4529 } else { 4530 PetscCall(MatSetType(*A, MATSEQAIJ)); 4531 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4532 } 4533 PetscFunctionReturn(0); 4534 } 4535 4536 /*MC 4537 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4538 4539 Synopsis: 4540 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4541 4542 Not Collective 4543 4544 Input Parameter: 4545 . A - the `MATMPIAIJ` matrix 4546 4547 Output Parameters: 4548 + Ad - the diagonal portion of the matrix 4549 . Ao - the off diagonal portion of the matrix 4550 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4551 - ierr - error code 4552 4553 Level: advanced 4554 4555 Note: 4556 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4557 4558 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4559 M*/ 4560 4561 /*MC 4562 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4563 4564 Synopsis: 4565 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4566 4567 Not Collective 4568 4569 Input Parameters: 4570 + A - the `MATMPIAIJ` matrix 4571 . Ad - the diagonal portion of the matrix 4572 . Ao - the off diagonal portion of the matrix 4573 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4574 - ierr - error code 4575 4576 Level: advanced 4577 4578 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4579 M*/ 4580 4581 /*@C 4582 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4583 4584 Not collective 4585 4586 Input Parameter: 4587 . A - The `MATMPIAIJ` matrix 4588 4589 Output Parameters: 4590 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4591 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4592 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4593 4594 Note: 4595 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4596 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4597 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4598 local column numbers to global column numbers in the original matrix. 4599 4600 Level: intermediate 4601 4602 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4603 @*/ 4604 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4605 { 4606 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4607 PetscBool flg; 4608 4609 PetscFunctionBegin; 4610 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4611 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4612 if (Ad) *Ad = a->A; 4613 if (Ao) *Ao = a->B; 4614 if (colmap) *colmap = a->garray; 4615 PetscFunctionReturn(0); 4616 } 4617 4618 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4619 { 4620 PetscInt m, N, i, rstart, nnz, Ii; 4621 PetscInt *indx; 4622 PetscScalar *values; 4623 MatType rootType; 4624 4625 PetscFunctionBegin; 4626 PetscCall(MatGetSize(inmat, &m, &N)); 4627 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4628 PetscInt *dnz, *onz, sum, bs, cbs; 4629 4630 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4631 /* Check sum(n) = N */ 4632 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4633 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4634 4635 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4636 rstart -= m; 4637 4638 MatPreallocateBegin(comm, m, n, dnz, onz); 4639 for (i = 0; i < m; i++) { 4640 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4641 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4642 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4643 } 4644 4645 PetscCall(MatCreate(comm, outmat)); 4646 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4647 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4648 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4649 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4650 PetscCall(MatSetType(*outmat, rootType)); 4651 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4652 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4653 MatPreallocateEnd(dnz, onz); 4654 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4655 } 4656 4657 /* numeric phase */ 4658 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4659 for (i = 0; i < m; i++) { 4660 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4661 Ii = i + rstart; 4662 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4663 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4664 } 4665 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4666 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4667 PetscFunctionReturn(0); 4668 } 4669 4670 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4671 { 4672 PetscMPIInt rank; 4673 PetscInt m, N, i, rstart, nnz; 4674 size_t len; 4675 const PetscInt *indx; 4676 PetscViewer out; 4677 char *name; 4678 Mat B; 4679 const PetscScalar *values; 4680 4681 PetscFunctionBegin; 4682 PetscCall(MatGetLocalSize(A, &m, NULL)); 4683 PetscCall(MatGetSize(A, NULL, &N)); 4684 /* Should this be the type of the diagonal block of A? */ 4685 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4686 PetscCall(MatSetSizes(B, m, N, m, N)); 4687 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4688 PetscCall(MatSetType(B, MATSEQAIJ)); 4689 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4690 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4691 for (i = 0; i < m; i++) { 4692 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4693 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4694 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4695 } 4696 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4697 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4698 4699 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4700 PetscCall(PetscStrlen(outfile, &len)); 4701 PetscCall(PetscMalloc1(len + 6, &name)); 4702 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4703 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4704 PetscCall(PetscFree(name)); 4705 PetscCall(MatView(B, out)); 4706 PetscCall(PetscViewerDestroy(&out)); 4707 PetscCall(MatDestroy(&B)); 4708 PetscFunctionReturn(0); 4709 } 4710 4711 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4712 { 4713 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4714 4715 PetscFunctionBegin; 4716 if (!merge) PetscFunctionReturn(0); 4717 PetscCall(PetscFree(merge->id_r)); 4718 PetscCall(PetscFree(merge->len_s)); 4719 PetscCall(PetscFree(merge->len_r)); 4720 PetscCall(PetscFree(merge->bi)); 4721 PetscCall(PetscFree(merge->bj)); 4722 PetscCall(PetscFree(merge->buf_ri[0])); 4723 PetscCall(PetscFree(merge->buf_ri)); 4724 PetscCall(PetscFree(merge->buf_rj[0])); 4725 PetscCall(PetscFree(merge->buf_rj)); 4726 PetscCall(PetscFree(merge->coi)); 4727 PetscCall(PetscFree(merge->coj)); 4728 PetscCall(PetscFree(merge->owners_co)); 4729 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4730 PetscCall(PetscFree(merge)); 4731 PetscFunctionReturn(0); 4732 } 4733 4734 #include <../src/mat/utils/freespace.h> 4735 #include <petscbt.h> 4736 4737 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4738 { 4739 MPI_Comm comm; 4740 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4741 PetscMPIInt size, rank, taga, *len_s; 4742 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4743 PetscInt proc, m; 4744 PetscInt **buf_ri, **buf_rj; 4745 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4746 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4747 MPI_Request *s_waits, *r_waits; 4748 MPI_Status *status; 4749 const MatScalar *aa, *a_a; 4750 MatScalar **abuf_r, *ba_i; 4751 Mat_Merge_SeqsToMPI *merge; 4752 PetscContainer container; 4753 4754 PetscFunctionBegin; 4755 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4756 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4757 4758 PetscCallMPI(MPI_Comm_size(comm, &size)); 4759 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4760 4761 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4762 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4763 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4764 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4765 aa = a_a; 4766 4767 bi = merge->bi; 4768 bj = merge->bj; 4769 buf_ri = merge->buf_ri; 4770 buf_rj = merge->buf_rj; 4771 4772 PetscCall(PetscMalloc1(size, &status)); 4773 owners = merge->rowmap->range; 4774 len_s = merge->len_s; 4775 4776 /* send and recv matrix values */ 4777 /*-----------------------------*/ 4778 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4779 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4780 4781 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4782 for (proc = 0, k = 0; proc < size; proc++) { 4783 if (!len_s[proc]) continue; 4784 i = owners[proc]; 4785 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4786 k++; 4787 } 4788 4789 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4790 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4791 PetscCall(PetscFree(status)); 4792 4793 PetscCall(PetscFree(s_waits)); 4794 PetscCall(PetscFree(r_waits)); 4795 4796 /* insert mat values of mpimat */ 4797 /*----------------------------*/ 4798 PetscCall(PetscMalloc1(N, &ba_i)); 4799 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4800 4801 for (k = 0; k < merge->nrecv; k++) { 4802 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4803 nrows = *(buf_ri_k[k]); 4804 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4805 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4806 } 4807 4808 /* set values of ba */ 4809 m = merge->rowmap->n; 4810 for (i = 0; i < m; i++) { 4811 arow = owners[rank] + i; 4812 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4813 bnzi = bi[i + 1] - bi[i]; 4814 PetscCall(PetscArrayzero(ba_i, bnzi)); 4815 4816 /* add local non-zero vals of this proc's seqmat into ba */ 4817 anzi = ai[arow + 1] - ai[arow]; 4818 aj = a->j + ai[arow]; 4819 aa = a_a + ai[arow]; 4820 nextaj = 0; 4821 for (j = 0; nextaj < anzi; j++) { 4822 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4823 ba_i[j] += aa[nextaj++]; 4824 } 4825 } 4826 4827 /* add received vals into ba */ 4828 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4829 /* i-th row */ 4830 if (i == *nextrow[k]) { 4831 anzi = *(nextai[k] + 1) - *nextai[k]; 4832 aj = buf_rj[k] + *(nextai[k]); 4833 aa = abuf_r[k] + *(nextai[k]); 4834 nextaj = 0; 4835 for (j = 0; nextaj < anzi; j++) { 4836 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4837 ba_i[j] += aa[nextaj++]; 4838 } 4839 } 4840 nextrow[k]++; 4841 nextai[k]++; 4842 } 4843 } 4844 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4845 } 4846 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4847 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4848 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4849 4850 PetscCall(PetscFree(abuf_r[0])); 4851 PetscCall(PetscFree(abuf_r)); 4852 PetscCall(PetscFree(ba_i)); 4853 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4854 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4855 PetscFunctionReturn(0); 4856 } 4857 4858 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4859 { 4860 Mat B_mpi; 4861 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4862 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4863 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4864 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4865 PetscInt len, proc, *dnz, *onz, bs, cbs; 4866 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4867 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4868 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4869 MPI_Status *status; 4870 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4871 PetscBT lnkbt; 4872 Mat_Merge_SeqsToMPI *merge; 4873 PetscContainer container; 4874 4875 PetscFunctionBegin; 4876 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4877 4878 /* make sure it is a PETSc comm */ 4879 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4880 PetscCallMPI(MPI_Comm_size(comm, &size)); 4881 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4882 4883 PetscCall(PetscNew(&merge)); 4884 PetscCall(PetscMalloc1(size, &status)); 4885 4886 /* determine row ownership */ 4887 /*---------------------------------------------------------*/ 4888 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4889 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4890 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4891 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4892 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4893 PetscCall(PetscMalloc1(size, &len_si)); 4894 PetscCall(PetscMalloc1(size, &merge->len_s)); 4895 4896 m = merge->rowmap->n; 4897 owners = merge->rowmap->range; 4898 4899 /* determine the number of messages to send, their lengths */ 4900 /*---------------------------------------------------------*/ 4901 len_s = merge->len_s; 4902 4903 len = 0; /* length of buf_si[] */ 4904 merge->nsend = 0; 4905 for (proc = 0; proc < size; proc++) { 4906 len_si[proc] = 0; 4907 if (proc == rank) { 4908 len_s[proc] = 0; 4909 } else { 4910 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4911 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4912 } 4913 if (len_s[proc]) { 4914 merge->nsend++; 4915 nrows = 0; 4916 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4917 if (ai[i + 1] > ai[i]) nrows++; 4918 } 4919 len_si[proc] = 2 * (nrows + 1); 4920 len += len_si[proc]; 4921 } 4922 } 4923 4924 /* determine the number and length of messages to receive for ij-structure */ 4925 /*-------------------------------------------------------------------------*/ 4926 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4927 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4928 4929 /* post the Irecv of j-structure */ 4930 /*-------------------------------*/ 4931 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4932 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4933 4934 /* post the Isend of j-structure */ 4935 /*--------------------------------*/ 4936 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4937 4938 for (proc = 0, k = 0; proc < size; proc++) { 4939 if (!len_s[proc]) continue; 4940 i = owners[proc]; 4941 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4942 k++; 4943 } 4944 4945 /* receives and sends of j-structure are complete */ 4946 /*------------------------------------------------*/ 4947 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4948 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4949 4950 /* send and recv i-structure */ 4951 /*---------------------------*/ 4952 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4953 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4954 4955 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4956 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4957 for (proc = 0, k = 0; proc < size; proc++) { 4958 if (!len_s[proc]) continue; 4959 /* form outgoing message for i-structure: 4960 buf_si[0]: nrows to be sent 4961 [1:nrows]: row index (global) 4962 [nrows+1:2*nrows+1]: i-structure index 4963 */ 4964 /*-------------------------------------------*/ 4965 nrows = len_si[proc] / 2 - 1; 4966 buf_si_i = buf_si + nrows + 1; 4967 buf_si[0] = nrows; 4968 buf_si_i[0] = 0; 4969 nrows = 0; 4970 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4971 anzi = ai[i + 1] - ai[i]; 4972 if (anzi) { 4973 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4974 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4975 nrows++; 4976 } 4977 } 4978 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4979 k++; 4980 buf_si += len_si[proc]; 4981 } 4982 4983 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4984 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4985 4986 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4987 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4988 4989 PetscCall(PetscFree(len_si)); 4990 PetscCall(PetscFree(len_ri)); 4991 PetscCall(PetscFree(rj_waits)); 4992 PetscCall(PetscFree2(si_waits, sj_waits)); 4993 PetscCall(PetscFree(ri_waits)); 4994 PetscCall(PetscFree(buf_s)); 4995 PetscCall(PetscFree(status)); 4996 4997 /* compute a local seq matrix in each processor */ 4998 /*----------------------------------------------*/ 4999 /* allocate bi array and free space for accumulating nonzero column info */ 5000 PetscCall(PetscMalloc1(m + 1, &bi)); 5001 bi[0] = 0; 5002 5003 /* create and initialize a linked list */ 5004 nlnk = N + 1; 5005 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 5006 5007 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5008 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5009 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5010 5011 current_space = free_space; 5012 5013 /* determine symbolic info for each local row */ 5014 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5015 5016 for (k = 0; k < merge->nrecv; k++) { 5017 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5018 nrows = *buf_ri_k[k]; 5019 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5020 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5021 } 5022 5023 MatPreallocateBegin(comm, m, n, dnz, onz); 5024 len = 0; 5025 for (i = 0; i < m; i++) { 5026 bnzi = 0; 5027 /* add local non-zero cols of this proc's seqmat into lnk */ 5028 arow = owners[rank] + i; 5029 anzi = ai[arow + 1] - ai[arow]; 5030 aj = a->j + ai[arow]; 5031 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5032 bnzi += nlnk; 5033 /* add received col data into lnk */ 5034 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5035 if (i == *nextrow[k]) { /* i-th row */ 5036 anzi = *(nextai[k] + 1) - *nextai[k]; 5037 aj = buf_rj[k] + *nextai[k]; 5038 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5039 bnzi += nlnk; 5040 nextrow[k]++; 5041 nextai[k]++; 5042 } 5043 } 5044 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5045 5046 /* if free space is not available, make more free space */ 5047 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5048 /* copy data into free space, then initialize lnk */ 5049 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5050 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5051 5052 current_space->array += bnzi; 5053 current_space->local_used += bnzi; 5054 current_space->local_remaining -= bnzi; 5055 5056 bi[i + 1] = bi[i] + bnzi; 5057 } 5058 5059 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5060 5061 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5062 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5063 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5064 5065 /* create symbolic parallel matrix B_mpi */ 5066 /*---------------------------------------*/ 5067 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5068 PetscCall(MatCreate(comm, &B_mpi)); 5069 if (n == PETSC_DECIDE) { 5070 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5071 } else { 5072 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5073 } 5074 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5075 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5076 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5077 MatPreallocateEnd(dnz, onz); 5078 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5079 5080 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5081 B_mpi->assembled = PETSC_FALSE; 5082 merge->bi = bi; 5083 merge->bj = bj; 5084 merge->buf_ri = buf_ri; 5085 merge->buf_rj = buf_rj; 5086 merge->coi = NULL; 5087 merge->coj = NULL; 5088 merge->owners_co = NULL; 5089 5090 PetscCall(PetscCommDestroy(&comm)); 5091 5092 /* attach the supporting struct to B_mpi for reuse */ 5093 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5094 PetscCall(PetscContainerSetPointer(container, merge)); 5095 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5096 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5097 PetscCall(PetscContainerDestroy(&container)); 5098 *mpimat = B_mpi; 5099 5100 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5101 PetscFunctionReturn(0); 5102 } 5103 5104 /*@C 5105 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5106 matrices from each processor 5107 5108 Collective 5109 5110 Input Parameters: 5111 + comm - the communicators the parallel matrix will live on 5112 . seqmat - the input sequential matrices 5113 . m - number of local rows (or `PETSC_DECIDE`) 5114 . n - number of local columns (or `PETSC_DECIDE`) 5115 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5116 5117 Output Parameter: 5118 . mpimat - the parallel matrix generated 5119 5120 Level: advanced 5121 5122 Note: 5123 The dimensions of the sequential matrix in each processor MUST be the same. 5124 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5125 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5126 @*/ 5127 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5128 { 5129 PetscMPIInt size; 5130 5131 PetscFunctionBegin; 5132 PetscCallMPI(MPI_Comm_size(comm, &size)); 5133 if (size == 1) { 5134 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5135 if (scall == MAT_INITIAL_MATRIX) { 5136 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5137 } else { 5138 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5139 } 5140 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5141 PetscFunctionReturn(0); 5142 } 5143 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5144 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5145 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5146 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5147 PetscFunctionReturn(0); 5148 } 5149 5150 /*@ 5151 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5152 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5153 with `MatGetSize()` 5154 5155 Not Collective 5156 5157 Input Parameters: 5158 + A - the matrix 5159 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5160 5161 Output Parameter: 5162 . A_loc - the local sequential matrix generated 5163 5164 Level: developer 5165 5166 Notes: 5167 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5168 5169 Destroy the matrix with `MatDestroy()` 5170 5171 .seealso: `MatMPIAIJGetLocalMat()` 5172 @*/ 5173 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5174 { 5175 PetscBool mpi; 5176 5177 PetscFunctionBegin; 5178 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5179 if (mpi) { 5180 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5181 } else { 5182 *A_loc = A; 5183 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5184 } 5185 PetscFunctionReturn(0); 5186 } 5187 5188 /*@ 5189 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5190 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5191 with `MatGetSize()` 5192 5193 Not Collective 5194 5195 Input Parameters: 5196 + A - the matrix 5197 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5198 5199 Output Parameter: 5200 . A_loc - the local sequential matrix generated 5201 5202 Level: developer 5203 5204 Notes: 5205 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5206 5207 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5208 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5209 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5210 modify the values of the returned A_loc. 5211 5212 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5213 @*/ 5214 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5215 { 5216 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5217 Mat_SeqAIJ *mat, *a, *b; 5218 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5219 const PetscScalar *aa, *ba, *aav, *bav; 5220 PetscScalar *ca, *cam; 5221 PetscMPIInt size; 5222 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5223 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5224 PetscBool match; 5225 5226 PetscFunctionBegin; 5227 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5228 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5229 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5230 if (size == 1) { 5231 if (scall == MAT_INITIAL_MATRIX) { 5232 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5233 *A_loc = mpimat->A; 5234 } else if (scall == MAT_REUSE_MATRIX) { 5235 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5236 } 5237 PetscFunctionReturn(0); 5238 } 5239 5240 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5241 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5242 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5243 ai = a->i; 5244 aj = a->j; 5245 bi = b->i; 5246 bj = b->j; 5247 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5248 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5249 aa = aav; 5250 ba = bav; 5251 if (scall == MAT_INITIAL_MATRIX) { 5252 PetscCall(PetscMalloc1(1 + am, &ci)); 5253 ci[0] = 0; 5254 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5255 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5256 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5257 k = 0; 5258 for (i = 0; i < am; i++) { 5259 ncols_o = bi[i + 1] - bi[i]; 5260 ncols_d = ai[i + 1] - ai[i]; 5261 /* off-diagonal portion of A */ 5262 for (jo = 0; jo < ncols_o; jo++) { 5263 col = cmap[*bj]; 5264 if (col >= cstart) break; 5265 cj[k] = col; 5266 bj++; 5267 ca[k++] = *ba++; 5268 } 5269 /* diagonal portion of A */ 5270 for (j = 0; j < ncols_d; j++) { 5271 cj[k] = cstart + *aj++; 5272 ca[k++] = *aa++; 5273 } 5274 /* off-diagonal portion of A */ 5275 for (j = jo; j < ncols_o; j++) { 5276 cj[k] = cmap[*bj++]; 5277 ca[k++] = *ba++; 5278 } 5279 } 5280 /* put together the new matrix */ 5281 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5282 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5283 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5284 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5285 mat->free_a = PETSC_TRUE; 5286 mat->free_ij = PETSC_TRUE; 5287 mat->nonew = 0; 5288 } else if (scall == MAT_REUSE_MATRIX) { 5289 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5290 ci = mat->i; 5291 cj = mat->j; 5292 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5293 for (i = 0; i < am; i++) { 5294 /* off-diagonal portion of A */ 5295 ncols_o = bi[i + 1] - bi[i]; 5296 for (jo = 0; jo < ncols_o; jo++) { 5297 col = cmap[*bj]; 5298 if (col >= cstart) break; 5299 *cam++ = *ba++; 5300 bj++; 5301 } 5302 /* diagonal portion of A */ 5303 ncols_d = ai[i + 1] - ai[i]; 5304 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5305 /* off-diagonal portion of A */ 5306 for (j = jo; j < ncols_o; j++) { 5307 *cam++ = *ba++; 5308 bj++; 5309 } 5310 } 5311 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5312 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5313 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5314 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5315 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5316 PetscFunctionReturn(0); 5317 } 5318 5319 /*@ 5320 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5321 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5322 5323 Not Collective 5324 5325 Input Parameters: 5326 + A - the matrix 5327 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5328 5329 Output Parameters: 5330 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5331 - A_loc - the local sequential matrix generated 5332 5333 Level: developer 5334 5335 Note: 5336 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5337 5338 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5339 @*/ 5340 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5341 { 5342 Mat Ao, Ad; 5343 const PetscInt *cmap; 5344 PetscMPIInt size; 5345 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5346 5347 PetscFunctionBegin; 5348 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5349 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5350 if (size == 1) { 5351 if (scall == MAT_INITIAL_MATRIX) { 5352 PetscCall(PetscObjectReference((PetscObject)Ad)); 5353 *A_loc = Ad; 5354 } else if (scall == MAT_REUSE_MATRIX) { 5355 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5356 } 5357 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5358 PetscFunctionReturn(0); 5359 } 5360 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5361 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5362 if (f) { 5363 PetscCall((*f)(A, scall, glob, A_loc)); 5364 } else { 5365 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5366 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5367 Mat_SeqAIJ *c; 5368 PetscInt *ai = a->i, *aj = a->j; 5369 PetscInt *bi = b->i, *bj = b->j; 5370 PetscInt *ci, *cj; 5371 const PetscScalar *aa, *ba; 5372 PetscScalar *ca; 5373 PetscInt i, j, am, dn, on; 5374 5375 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5376 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5377 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5378 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5379 if (scall == MAT_INITIAL_MATRIX) { 5380 PetscInt k; 5381 PetscCall(PetscMalloc1(1 + am, &ci)); 5382 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5383 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5384 ci[0] = 0; 5385 for (i = 0, k = 0; i < am; i++) { 5386 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5387 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5388 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5389 /* diagonal portion of A */ 5390 for (j = 0; j < ncols_d; j++, k++) { 5391 cj[k] = *aj++; 5392 ca[k] = *aa++; 5393 } 5394 /* off-diagonal portion of A */ 5395 for (j = 0; j < ncols_o; j++, k++) { 5396 cj[k] = dn + *bj++; 5397 ca[k] = *ba++; 5398 } 5399 } 5400 /* put together the new matrix */ 5401 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5402 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5403 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5404 c = (Mat_SeqAIJ *)(*A_loc)->data; 5405 c->free_a = PETSC_TRUE; 5406 c->free_ij = PETSC_TRUE; 5407 c->nonew = 0; 5408 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5409 } else if (scall == MAT_REUSE_MATRIX) { 5410 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5411 for (i = 0; i < am; i++) { 5412 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5413 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5414 /* diagonal portion of A */ 5415 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5416 /* off-diagonal portion of A */ 5417 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5418 } 5419 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5420 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5421 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5422 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5423 if (glob) { 5424 PetscInt cst, *gidx; 5425 5426 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5427 PetscCall(PetscMalloc1(dn + on, &gidx)); 5428 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5429 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5430 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5431 } 5432 } 5433 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5434 PetscFunctionReturn(0); 5435 } 5436 5437 /*@C 5438 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5439 5440 Not Collective 5441 5442 Input Parameters: 5443 + A - the matrix 5444 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5445 - row, col - index sets of rows and columns to extract (or NULL) 5446 5447 Output Parameter: 5448 . A_loc - the local sequential matrix generated 5449 5450 Level: developer 5451 5452 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5453 @*/ 5454 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5455 { 5456 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5457 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5458 IS isrowa, iscola; 5459 Mat *aloc; 5460 PetscBool match; 5461 5462 PetscFunctionBegin; 5463 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5464 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5465 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5466 if (!row) { 5467 start = A->rmap->rstart; 5468 end = A->rmap->rend; 5469 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5470 } else { 5471 isrowa = *row; 5472 } 5473 if (!col) { 5474 start = A->cmap->rstart; 5475 cmap = a->garray; 5476 nzA = a->A->cmap->n; 5477 nzB = a->B->cmap->n; 5478 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5479 ncols = 0; 5480 for (i = 0; i < nzB; i++) { 5481 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5482 else break; 5483 } 5484 imark = i; 5485 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5486 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5487 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5488 } else { 5489 iscola = *col; 5490 } 5491 if (scall != MAT_INITIAL_MATRIX) { 5492 PetscCall(PetscMalloc1(1, &aloc)); 5493 aloc[0] = *A_loc; 5494 } 5495 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5496 if (!col) { /* attach global id of condensed columns */ 5497 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5498 } 5499 *A_loc = aloc[0]; 5500 PetscCall(PetscFree(aloc)); 5501 if (!row) PetscCall(ISDestroy(&isrowa)); 5502 if (!col) PetscCall(ISDestroy(&iscola)); 5503 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5504 PetscFunctionReturn(0); 5505 } 5506 5507 /* 5508 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5509 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5510 * on a global size. 5511 * */ 5512 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5513 { 5514 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5515 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5516 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5517 PetscMPIInt owner; 5518 PetscSFNode *iremote, *oiremote; 5519 const PetscInt *lrowindices; 5520 PetscSF sf, osf; 5521 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5522 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5523 MPI_Comm comm; 5524 ISLocalToGlobalMapping mapping; 5525 const PetscScalar *pd_a, *po_a; 5526 5527 PetscFunctionBegin; 5528 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5529 /* plocalsize is the number of roots 5530 * nrows is the number of leaves 5531 * */ 5532 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5533 PetscCall(ISGetLocalSize(rows, &nrows)); 5534 PetscCall(PetscCalloc1(nrows, &iremote)); 5535 PetscCall(ISGetIndices(rows, &lrowindices)); 5536 for (i = 0; i < nrows; i++) { 5537 /* Find a remote index and an owner for a row 5538 * The row could be local or remote 5539 * */ 5540 owner = 0; 5541 lidx = 0; 5542 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5543 iremote[i].index = lidx; 5544 iremote[i].rank = owner; 5545 } 5546 /* Create SF to communicate how many nonzero columns for each row */ 5547 PetscCall(PetscSFCreate(comm, &sf)); 5548 /* SF will figure out the number of nonzero colunms for each row, and their 5549 * offsets 5550 * */ 5551 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5552 PetscCall(PetscSFSetFromOptions(sf)); 5553 PetscCall(PetscSFSetUp(sf)); 5554 5555 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5556 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5557 PetscCall(PetscCalloc1(nrows, &pnnz)); 5558 roffsets[0] = 0; 5559 roffsets[1] = 0; 5560 for (i = 0; i < plocalsize; i++) { 5561 /* diag */ 5562 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5563 /* off diag */ 5564 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5565 /* compute offsets so that we relative location for each row */ 5566 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5567 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5568 } 5569 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5570 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5571 /* 'r' means root, and 'l' means leaf */ 5572 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5573 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5574 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5575 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5576 PetscCall(PetscSFDestroy(&sf)); 5577 PetscCall(PetscFree(roffsets)); 5578 PetscCall(PetscFree(nrcols)); 5579 dntotalcols = 0; 5580 ontotalcols = 0; 5581 ncol = 0; 5582 for (i = 0; i < nrows; i++) { 5583 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5584 ncol = PetscMax(pnnz[i], ncol); 5585 /* diag */ 5586 dntotalcols += nlcols[i * 2 + 0]; 5587 /* off diag */ 5588 ontotalcols += nlcols[i * 2 + 1]; 5589 } 5590 /* We do not need to figure the right number of columns 5591 * since all the calculations will be done by going through the raw data 5592 * */ 5593 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5594 PetscCall(MatSetUp(*P_oth)); 5595 PetscCall(PetscFree(pnnz)); 5596 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5597 /* diag */ 5598 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5599 /* off diag */ 5600 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5601 /* diag */ 5602 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5603 /* off diag */ 5604 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5605 dntotalcols = 0; 5606 ontotalcols = 0; 5607 ntotalcols = 0; 5608 for (i = 0; i < nrows; i++) { 5609 owner = 0; 5610 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5611 /* Set iremote for diag matrix */ 5612 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5613 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5614 iremote[dntotalcols].rank = owner; 5615 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5616 ilocal[dntotalcols++] = ntotalcols++; 5617 } 5618 /* off diag */ 5619 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5620 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5621 oiremote[ontotalcols].rank = owner; 5622 oilocal[ontotalcols++] = ntotalcols++; 5623 } 5624 } 5625 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5626 PetscCall(PetscFree(loffsets)); 5627 PetscCall(PetscFree(nlcols)); 5628 PetscCall(PetscSFCreate(comm, &sf)); 5629 /* P serves as roots and P_oth is leaves 5630 * Diag matrix 5631 * */ 5632 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5633 PetscCall(PetscSFSetFromOptions(sf)); 5634 PetscCall(PetscSFSetUp(sf)); 5635 5636 PetscCall(PetscSFCreate(comm, &osf)); 5637 /* Off diag */ 5638 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5639 PetscCall(PetscSFSetFromOptions(osf)); 5640 PetscCall(PetscSFSetUp(osf)); 5641 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5642 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5643 /* We operate on the matrix internal data for saving memory */ 5644 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5645 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5646 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5647 /* Convert to global indices for diag matrix */ 5648 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5649 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5650 /* We want P_oth store global indices */ 5651 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5652 /* Use memory scalable approach */ 5653 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5654 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5655 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5656 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5657 /* Convert back to local indices */ 5658 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5659 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5660 nout = 0; 5661 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5662 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5663 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5664 /* Exchange values */ 5665 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5666 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5667 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5668 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5669 /* Stop PETSc from shrinking memory */ 5670 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5671 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5672 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5673 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5674 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5675 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5676 PetscCall(PetscSFDestroy(&sf)); 5677 PetscCall(PetscSFDestroy(&osf)); 5678 PetscFunctionReturn(0); 5679 } 5680 5681 /* 5682 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5683 * This supports MPIAIJ and MAIJ 5684 * */ 5685 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5686 { 5687 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5688 Mat_SeqAIJ *p_oth; 5689 IS rows, map; 5690 PetscHMapI hamp; 5691 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5692 MPI_Comm comm; 5693 PetscSF sf, osf; 5694 PetscBool has; 5695 5696 PetscFunctionBegin; 5697 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5698 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5699 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5700 * and then create a submatrix (that often is an overlapping matrix) 5701 * */ 5702 if (reuse == MAT_INITIAL_MATRIX) { 5703 /* Use a hash table to figure out unique keys */ 5704 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5705 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5706 count = 0; 5707 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5708 for (i = 0; i < a->B->cmap->n; i++) { 5709 key = a->garray[i] / dof; 5710 PetscCall(PetscHMapIHas(hamp, key, &has)); 5711 if (!has) { 5712 mapping[i] = count; 5713 PetscCall(PetscHMapISet(hamp, key, count++)); 5714 } else { 5715 /* Current 'i' has the same value the previous step */ 5716 mapping[i] = count - 1; 5717 } 5718 } 5719 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5720 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5721 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5722 PetscCall(PetscCalloc1(htsize, &rowindices)); 5723 off = 0; 5724 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5725 PetscCall(PetscHMapIDestroy(&hamp)); 5726 PetscCall(PetscSortInt(htsize, rowindices)); 5727 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5728 /* In case, the matrix was already created but users want to recreate the matrix */ 5729 PetscCall(MatDestroy(P_oth)); 5730 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5731 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5732 PetscCall(ISDestroy(&map)); 5733 PetscCall(ISDestroy(&rows)); 5734 } else if (reuse == MAT_REUSE_MATRIX) { 5735 /* If matrix was already created, we simply update values using SF objects 5736 * that as attached to the matrix ealier. 5737 */ 5738 const PetscScalar *pd_a, *po_a; 5739 5740 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5741 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5742 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5743 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5744 /* Update values in place */ 5745 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5746 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5747 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5748 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5749 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5750 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5751 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5752 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5753 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5754 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5755 PetscFunctionReturn(0); 5756 } 5757 5758 /*@C 5759 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5760 5761 Collective 5762 5763 Input Parameters: 5764 + A - the first matrix in `MATMPIAIJ` format 5765 . B - the second matrix in `MATMPIAIJ` format 5766 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5767 5768 Output Parameters: 5769 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5770 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5771 - B_seq - the sequential matrix generated 5772 5773 Level: developer 5774 5775 @*/ 5776 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5777 { 5778 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5779 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5780 IS isrowb, iscolb; 5781 Mat *bseq = NULL; 5782 5783 PetscFunctionBegin; 5784 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5785 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5786 } 5787 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5788 5789 if (scall == MAT_INITIAL_MATRIX) { 5790 start = A->cmap->rstart; 5791 cmap = a->garray; 5792 nzA = a->A->cmap->n; 5793 nzB = a->B->cmap->n; 5794 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5795 ncols = 0; 5796 for (i = 0; i < nzB; i++) { /* row < local row index */ 5797 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5798 else break; 5799 } 5800 imark = i; 5801 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5802 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5803 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5804 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5805 } else { 5806 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5807 isrowb = *rowb; 5808 iscolb = *colb; 5809 PetscCall(PetscMalloc1(1, &bseq)); 5810 bseq[0] = *B_seq; 5811 } 5812 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5813 *B_seq = bseq[0]; 5814 PetscCall(PetscFree(bseq)); 5815 if (!rowb) { 5816 PetscCall(ISDestroy(&isrowb)); 5817 } else { 5818 *rowb = isrowb; 5819 } 5820 if (!colb) { 5821 PetscCall(ISDestroy(&iscolb)); 5822 } else { 5823 *colb = iscolb; 5824 } 5825 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5826 PetscFunctionReturn(0); 5827 } 5828 5829 /* 5830 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5831 of the OFF-DIAGONAL portion of local A 5832 5833 Collective 5834 5835 Input Parameters: 5836 + A,B - the matrices in mpiaij format 5837 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5838 5839 Output Parameter: 5840 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5841 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5842 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5843 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5844 5845 Developer Note: 5846 This directly accesses information inside the VecScatter associated with the matrix-vector product 5847 for this matrix. This is not desirable.. 5848 5849 Level: developer 5850 5851 */ 5852 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5853 { 5854 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5855 Mat_SeqAIJ *b_oth; 5856 VecScatter ctx; 5857 MPI_Comm comm; 5858 const PetscMPIInt *rprocs, *sprocs; 5859 const PetscInt *srow, *rstarts, *sstarts; 5860 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5861 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5862 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5863 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5864 PetscMPIInt size, tag, rank, nreqs; 5865 5866 PetscFunctionBegin; 5867 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5868 PetscCallMPI(MPI_Comm_size(comm, &size)); 5869 5870 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5871 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5872 } 5873 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5874 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5875 5876 if (size == 1) { 5877 startsj_s = NULL; 5878 bufa_ptr = NULL; 5879 *B_oth = NULL; 5880 PetscFunctionReturn(0); 5881 } 5882 5883 ctx = a->Mvctx; 5884 tag = ((PetscObject)ctx)->tag; 5885 5886 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5887 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5888 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5889 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5890 PetscCall(PetscMalloc1(nreqs, &reqs)); 5891 rwaits = reqs; 5892 swaits = reqs + nrecvs; 5893 5894 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5895 if (scall == MAT_INITIAL_MATRIX) { 5896 /* i-array */ 5897 /*---------*/ 5898 /* post receives */ 5899 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5900 for (i = 0; i < nrecvs; i++) { 5901 rowlen = rvalues + rstarts[i] * rbs; 5902 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5903 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5904 } 5905 5906 /* pack the outgoing message */ 5907 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5908 5909 sstartsj[0] = 0; 5910 rstartsj[0] = 0; 5911 len = 0; /* total length of j or a array to be sent */ 5912 if (nsends) { 5913 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5914 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5915 } 5916 for (i = 0; i < nsends; i++) { 5917 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5918 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5919 for (j = 0; j < nrows; j++) { 5920 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5921 for (l = 0; l < sbs; l++) { 5922 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5923 5924 rowlen[j * sbs + l] = ncols; 5925 5926 len += ncols; 5927 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5928 } 5929 k++; 5930 } 5931 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5932 5933 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5934 } 5935 /* recvs and sends of i-array are completed */ 5936 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5937 PetscCall(PetscFree(svalues)); 5938 5939 /* allocate buffers for sending j and a arrays */ 5940 PetscCall(PetscMalloc1(len + 1, &bufj)); 5941 PetscCall(PetscMalloc1(len + 1, &bufa)); 5942 5943 /* create i-array of B_oth */ 5944 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5945 5946 b_othi[0] = 0; 5947 len = 0; /* total length of j or a array to be received */ 5948 k = 0; 5949 for (i = 0; i < nrecvs; i++) { 5950 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5951 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5952 for (j = 0; j < nrows; j++) { 5953 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5954 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5955 k++; 5956 } 5957 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5958 } 5959 PetscCall(PetscFree(rvalues)); 5960 5961 /* allocate space for j and a arrays of B_oth */ 5962 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5963 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5964 5965 /* j-array */ 5966 /*---------*/ 5967 /* post receives of j-array */ 5968 for (i = 0; i < nrecvs; i++) { 5969 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5970 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5971 } 5972 5973 /* pack the outgoing message j-array */ 5974 if (nsends) k = sstarts[0]; 5975 for (i = 0; i < nsends; i++) { 5976 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5977 bufJ = bufj + sstartsj[i]; 5978 for (j = 0; j < nrows; j++) { 5979 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5980 for (ll = 0; ll < sbs; ll++) { 5981 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5982 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5983 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5984 } 5985 } 5986 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5987 } 5988 5989 /* recvs and sends of j-array are completed */ 5990 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5991 } else if (scall == MAT_REUSE_MATRIX) { 5992 sstartsj = *startsj_s; 5993 rstartsj = *startsj_r; 5994 bufa = *bufa_ptr; 5995 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5996 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5997 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5998 5999 /* a-array */ 6000 /*---------*/ 6001 /* post receives of a-array */ 6002 for (i = 0; i < nrecvs; i++) { 6003 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 6004 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 6005 } 6006 6007 /* pack the outgoing message a-array */ 6008 if (nsends) k = sstarts[0]; 6009 for (i = 0; i < nsends; i++) { 6010 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6011 bufA = bufa + sstartsj[i]; 6012 for (j = 0; j < nrows; j++) { 6013 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6014 for (ll = 0; ll < sbs; ll++) { 6015 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6016 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6017 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6018 } 6019 } 6020 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6021 } 6022 /* recvs and sends of a-array are completed */ 6023 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6024 PetscCall(PetscFree(reqs)); 6025 6026 if (scall == MAT_INITIAL_MATRIX) { 6027 /* put together the new matrix */ 6028 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6029 6030 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6031 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6032 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6033 b_oth->free_a = PETSC_TRUE; 6034 b_oth->free_ij = PETSC_TRUE; 6035 b_oth->nonew = 0; 6036 6037 PetscCall(PetscFree(bufj)); 6038 if (!startsj_s || !bufa_ptr) { 6039 PetscCall(PetscFree2(sstartsj, rstartsj)); 6040 PetscCall(PetscFree(bufa_ptr)); 6041 } else { 6042 *startsj_s = sstartsj; 6043 *startsj_r = rstartsj; 6044 *bufa_ptr = bufa; 6045 } 6046 } else if (scall == MAT_REUSE_MATRIX) { 6047 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6048 } 6049 6050 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6051 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6052 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6053 PetscFunctionReturn(0); 6054 } 6055 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6059 #if defined(PETSC_HAVE_MKL_SPARSE) 6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6061 #endif 6062 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6064 #if defined(PETSC_HAVE_ELEMENTAL) 6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6066 #endif 6067 #if defined(PETSC_HAVE_SCALAPACK) 6068 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6069 #endif 6070 #if defined(PETSC_HAVE_HYPRE) 6071 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6072 #endif 6073 #if defined(PETSC_HAVE_CUDA) 6074 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6075 #endif 6076 #if defined(PETSC_HAVE_HIP) 6077 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6078 #endif 6079 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6080 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6081 #endif 6082 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6083 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6084 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6085 6086 /* 6087 Computes (B'*A')' since computing B*A directly is untenable 6088 6089 n p p 6090 [ ] [ ] [ ] 6091 m [ A ] * n [ B ] = m [ C ] 6092 [ ] [ ] [ ] 6093 6094 */ 6095 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6096 { 6097 Mat At, Bt, Ct; 6098 6099 PetscFunctionBegin; 6100 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6101 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6102 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6103 PetscCall(MatDestroy(&At)); 6104 PetscCall(MatDestroy(&Bt)); 6105 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6106 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6107 PetscCall(MatDestroy(&Ct)); 6108 PetscFunctionReturn(0); 6109 } 6110 6111 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6112 { 6113 PetscBool cisdense; 6114 6115 PetscFunctionBegin; 6116 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6117 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6118 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6119 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6120 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6121 PetscCall(MatSetUp(C)); 6122 6123 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6124 PetscFunctionReturn(0); 6125 } 6126 6127 /* ----------------------------------------------------------------*/ 6128 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6129 { 6130 Mat_Product *product = C->product; 6131 Mat A = product->A, B = product->B; 6132 6133 PetscFunctionBegin; 6134 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6135 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6136 6137 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6138 C->ops->productsymbolic = MatProductSymbolic_AB; 6139 PetscFunctionReturn(0); 6140 } 6141 6142 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6143 { 6144 Mat_Product *product = C->product; 6145 6146 PetscFunctionBegin; 6147 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6148 PetscFunctionReturn(0); 6149 } 6150 6151 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6152 6153 Input Parameters: 6154 6155 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6156 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6157 6158 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6159 6160 For Set1, j1[] contains column indices of the nonzeros. 6161 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6162 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6163 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6164 6165 Similar for Set2. 6166 6167 This routine merges the two sets of nonzeros row by row and removes repeats. 6168 6169 Output Parameters: (memory is allocated by the caller) 6170 6171 i[],j[]: the CSR of the merged matrix, which has m rows. 6172 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6173 imap2[]: similar to imap1[], but for Set2. 6174 Note we order nonzeros row-by-row and from left to right. 6175 */ 6176 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6177 { 6178 PetscInt r, m; /* Row index of mat */ 6179 PetscCount t, t1, t2, b1, e1, b2, e2; 6180 6181 PetscFunctionBegin; 6182 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6183 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6184 i[0] = 0; 6185 for (r = 0; r < m; r++) { /* Do row by row merging */ 6186 b1 = rowBegin1[r]; 6187 e1 = rowEnd1[r]; 6188 b2 = rowBegin2[r]; 6189 e2 = rowEnd2[r]; 6190 while (b1 < e1 && b2 < e2) { 6191 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6192 j[t] = j1[b1]; 6193 imap1[t1] = t; 6194 imap2[t2] = t; 6195 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6196 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6197 t1++; 6198 t2++; 6199 t++; 6200 } else if (j1[b1] < j2[b2]) { 6201 j[t] = j1[b1]; 6202 imap1[t1] = t; 6203 b1 += jmap1[t1 + 1] - jmap1[t1]; 6204 t1++; 6205 t++; 6206 } else { 6207 j[t] = j2[b2]; 6208 imap2[t2] = t; 6209 b2 += jmap2[t2 + 1] - jmap2[t2]; 6210 t2++; 6211 t++; 6212 } 6213 } 6214 /* Merge the remaining in either j1[] or j2[] */ 6215 while (b1 < e1) { 6216 j[t] = j1[b1]; 6217 imap1[t1] = t; 6218 b1 += jmap1[t1 + 1] - jmap1[t1]; 6219 t1++; 6220 t++; 6221 } 6222 while (b2 < e2) { 6223 j[t] = j2[b2]; 6224 imap2[t2] = t; 6225 b2 += jmap2[t2 + 1] - jmap2[t2]; 6226 t2++; 6227 t++; 6228 } 6229 i[r + 1] = t; 6230 } 6231 PetscFunctionReturn(0); 6232 } 6233 6234 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6235 6236 Input Parameters: 6237 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6238 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6239 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6240 6241 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6242 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6243 6244 Output Parameters: 6245 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6246 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6247 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6248 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6249 6250 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6251 Atot: number of entries belonging to the diagonal block. 6252 Annz: number of unique nonzeros belonging to the diagonal block. 6253 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6254 repeats (i.e., same 'i,j' pair). 6255 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6256 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6257 6258 Atot: number of entries belonging to the diagonal block 6259 Annz: number of unique nonzeros belonging to the diagonal block. 6260 6261 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6262 6263 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6264 */ 6265 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6266 { 6267 PetscInt cstart, cend, rstart, rend, row, col; 6268 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6269 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6270 PetscCount k, m, p, q, r, s, mid; 6271 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6272 6273 PetscFunctionBegin; 6274 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6275 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6276 m = rend - rstart; 6277 6278 for (k = 0; k < n; k++) { 6279 if (i[k] >= 0) break; 6280 } /* Skip negative rows */ 6281 6282 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6283 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6284 */ 6285 while (k < n) { 6286 row = i[k]; 6287 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6288 for (s = k; s < n; s++) 6289 if (i[s] != row) break; 6290 for (p = k; p < s; p++) { 6291 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6292 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6293 } 6294 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6295 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6296 rowBegin[row - rstart] = k; 6297 rowMid[row - rstart] = mid; 6298 rowEnd[row - rstart] = s; 6299 6300 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6301 Atot += mid - k; 6302 Btot += s - mid; 6303 6304 /* Count unique nonzeros of this diag/offdiag row */ 6305 for (p = k; p < mid;) { 6306 col = j[p]; 6307 do { 6308 j[p] += PETSC_MAX_INT; 6309 p++; 6310 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6311 Annz++; 6312 } 6313 6314 for (p = mid; p < s;) { 6315 col = j[p]; 6316 do { 6317 p++; 6318 } while (p < s && j[p] == col); 6319 Bnnz++; 6320 } 6321 k = s; 6322 } 6323 6324 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6325 PetscCall(PetscMalloc1(Atot, &Aperm)); 6326 PetscCall(PetscMalloc1(Btot, &Bperm)); 6327 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6328 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6329 6330 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6331 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6332 for (r = 0; r < m; r++) { 6333 k = rowBegin[r]; 6334 mid = rowMid[r]; 6335 s = rowEnd[r]; 6336 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6337 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6338 Atot += mid - k; 6339 Btot += s - mid; 6340 6341 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6342 for (p = k; p < mid;) { 6343 col = j[p]; 6344 q = p; 6345 do { 6346 p++; 6347 } while (p < mid && j[p] == col); 6348 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6349 Annz++; 6350 } 6351 6352 for (p = mid; p < s;) { 6353 col = j[p]; 6354 q = p; 6355 do { 6356 p++; 6357 } while (p < s && j[p] == col); 6358 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6359 Bnnz++; 6360 } 6361 } 6362 /* Output */ 6363 *Aperm_ = Aperm; 6364 *Annz_ = Annz; 6365 *Atot_ = Atot; 6366 *Ajmap_ = Ajmap; 6367 *Bperm_ = Bperm; 6368 *Bnnz_ = Bnnz; 6369 *Btot_ = Btot; 6370 *Bjmap_ = Bjmap; 6371 PetscFunctionReturn(0); 6372 } 6373 6374 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6375 6376 Input Parameters: 6377 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6378 nnz: number of unique nonzeros in the merged matrix 6379 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6380 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6381 6382 Output Parameter: (memory is allocated by the caller) 6383 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6384 6385 Example: 6386 nnz1 = 4 6387 nnz = 6 6388 imap = [1,3,4,5] 6389 jmap = [0,3,5,6,7] 6390 then, 6391 jmap_new = [0,0,3,3,5,6,7] 6392 */ 6393 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6394 { 6395 PetscCount k, p; 6396 6397 PetscFunctionBegin; 6398 jmap_new[0] = 0; 6399 p = nnz; /* p loops over jmap_new[] backwards */ 6400 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6401 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6402 } 6403 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6404 PetscFunctionReturn(0); 6405 } 6406 6407 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6408 { 6409 MPI_Comm comm; 6410 PetscMPIInt rank, size; 6411 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6412 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6413 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6414 6415 PetscFunctionBegin; 6416 PetscCall(PetscFree(mpiaij->garray)); 6417 PetscCall(VecDestroy(&mpiaij->lvec)); 6418 #if defined(PETSC_USE_CTABLE) 6419 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6420 #else 6421 PetscCall(PetscFree(mpiaij->colmap)); 6422 #endif 6423 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6424 mat->assembled = PETSC_FALSE; 6425 mat->was_assembled = PETSC_FALSE; 6426 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6427 6428 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6429 PetscCallMPI(MPI_Comm_size(comm, &size)); 6430 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6431 PetscCall(PetscLayoutSetUp(mat->rmap)); 6432 PetscCall(PetscLayoutSetUp(mat->cmap)); 6433 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6434 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6435 PetscCall(MatGetLocalSize(mat, &m, &n)); 6436 PetscCall(MatGetSize(mat, &M, &N)); 6437 6438 /* ---------------------------------------------------------------------------*/ 6439 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6440 /* entries come first, then local rows, then remote rows. */ 6441 /* ---------------------------------------------------------------------------*/ 6442 PetscCount n1 = coo_n, *perm1; 6443 PetscInt *i1 = coo_i, *j1 = coo_j; 6444 6445 PetscCall(PetscMalloc1(n1, &perm1)); 6446 for (k = 0; k < n1; k++) perm1[k] = k; 6447 6448 /* Manipulate indices so that entries with negative row or col indices will have smallest 6449 row indices, local entries will have greater but negative row indices, and remote entries 6450 will have positive row indices. 6451 */ 6452 for (k = 0; k < n1; k++) { 6453 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6454 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6455 else { 6456 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6457 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6458 } 6459 } 6460 6461 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6462 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6463 for (k = 0; k < n1; k++) { 6464 if (i1[k] > PETSC_MIN_INT) break; 6465 } /* Advance k to the first entry we need to take care of */ 6466 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6467 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6468 6469 /* ---------------------------------------------------------------------------*/ 6470 /* Split local rows into diag/offdiag portions */ 6471 /* ---------------------------------------------------------------------------*/ 6472 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6473 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6474 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6475 6476 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6477 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6478 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6479 6480 /* ---------------------------------------------------------------------------*/ 6481 /* Send remote rows to their owner */ 6482 /* ---------------------------------------------------------------------------*/ 6483 /* Find which rows should be sent to which remote ranks*/ 6484 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6485 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6486 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6487 const PetscInt *ranges; 6488 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6489 6490 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6491 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6492 for (k = rem; k < n1;) { 6493 PetscMPIInt owner; 6494 PetscInt firstRow, lastRow; 6495 6496 /* Locate a row range */ 6497 firstRow = i1[k]; /* first row of this owner */ 6498 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6499 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6500 6501 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6502 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6503 6504 /* All entries in [k,p) belong to this remote owner */ 6505 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6506 PetscMPIInt *sendto2; 6507 PetscInt *nentries2; 6508 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6509 6510 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6511 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6512 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6513 PetscCall(PetscFree2(sendto, nentries2)); 6514 sendto = sendto2; 6515 nentries = nentries2; 6516 maxNsend = maxNsend2; 6517 } 6518 sendto[nsend] = owner; 6519 nentries[nsend] = p - k; 6520 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6521 nsend++; 6522 k = p; 6523 } 6524 6525 /* Build 1st SF to know offsets on remote to send data */ 6526 PetscSF sf1; 6527 PetscInt nroots = 1, nroots2 = 0; 6528 PetscInt nleaves = nsend, nleaves2 = 0; 6529 PetscInt *offsets; 6530 PetscSFNode *iremote; 6531 6532 PetscCall(PetscSFCreate(comm, &sf1)); 6533 PetscCall(PetscMalloc1(nsend, &iremote)); 6534 PetscCall(PetscMalloc1(nsend, &offsets)); 6535 for (k = 0; k < nsend; k++) { 6536 iremote[k].rank = sendto[k]; 6537 iremote[k].index = 0; 6538 nleaves2 += nentries[k]; 6539 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6540 } 6541 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6542 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6543 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6544 PetscCall(PetscSFDestroy(&sf1)); 6545 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6546 6547 /* Build 2nd SF to send remote COOs to their owner */ 6548 PetscSF sf2; 6549 nroots = nroots2; 6550 nleaves = nleaves2; 6551 PetscCall(PetscSFCreate(comm, &sf2)); 6552 PetscCall(PetscSFSetFromOptions(sf2)); 6553 PetscCall(PetscMalloc1(nleaves, &iremote)); 6554 p = 0; 6555 for (k = 0; k < nsend; k++) { 6556 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6557 for (q = 0; q < nentries[k]; q++, p++) { 6558 iremote[p].rank = sendto[k]; 6559 iremote[p].index = offsets[k] + q; 6560 } 6561 } 6562 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6563 6564 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6565 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6566 6567 /* Send the remote COOs to their owner */ 6568 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6569 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6570 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6571 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6572 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6573 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6574 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6575 6576 PetscCall(PetscFree(offsets)); 6577 PetscCall(PetscFree2(sendto, nentries)); 6578 6579 /* ---------------------------------------------------------------*/ 6580 /* Sort received COOs by row along with the permutation array */ 6581 /* ---------------------------------------------------------------*/ 6582 for (k = 0; k < n2; k++) perm2[k] = k; 6583 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6584 6585 /* ---------------------------------------------------------------*/ 6586 /* Split received COOs into diag/offdiag portions */ 6587 /* ---------------------------------------------------------------*/ 6588 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6589 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6590 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6591 6592 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6593 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6594 6595 /* --------------------------------------------------------------------------*/ 6596 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6597 /* --------------------------------------------------------------------------*/ 6598 PetscInt *Ai, *Bi; 6599 PetscInt *Aj, *Bj; 6600 6601 PetscCall(PetscMalloc1(m + 1, &Ai)); 6602 PetscCall(PetscMalloc1(m + 1, &Bi)); 6603 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6604 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6605 6606 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6607 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6608 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6609 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6610 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6611 6612 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6613 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6614 6615 /* --------------------------------------------------------------------------*/ 6616 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6617 /* expect nonzeros in A/B most likely have local contributing entries */ 6618 /* --------------------------------------------------------------------------*/ 6619 PetscInt Annz = Ai[m]; 6620 PetscInt Bnnz = Bi[m]; 6621 PetscCount *Ajmap1_new, *Bjmap1_new; 6622 6623 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6624 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6625 6626 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6627 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6628 6629 PetscCall(PetscFree(Aimap1)); 6630 PetscCall(PetscFree(Ajmap1)); 6631 PetscCall(PetscFree(Bimap1)); 6632 PetscCall(PetscFree(Bjmap1)); 6633 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6634 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6635 PetscCall(PetscFree(perm1)); 6636 PetscCall(PetscFree3(i2, j2, perm2)); 6637 6638 Ajmap1 = Ajmap1_new; 6639 Bjmap1 = Bjmap1_new; 6640 6641 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6642 if (Annz < Annz1 + Annz2) { 6643 PetscInt *Aj_new; 6644 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6645 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6646 PetscCall(PetscFree(Aj)); 6647 Aj = Aj_new; 6648 } 6649 6650 if (Bnnz < Bnnz1 + Bnnz2) { 6651 PetscInt *Bj_new; 6652 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6653 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6654 PetscCall(PetscFree(Bj)); 6655 Bj = Bj_new; 6656 } 6657 6658 /* --------------------------------------------------------------------------------*/ 6659 /* Create new submatrices for on-process and off-process coupling */ 6660 /* --------------------------------------------------------------------------------*/ 6661 PetscScalar *Aa, *Ba; 6662 MatType rtype; 6663 Mat_SeqAIJ *a, *b; 6664 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6665 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6666 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6667 if (cstart) { 6668 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6669 } 6670 PetscCall(MatDestroy(&mpiaij->A)); 6671 PetscCall(MatDestroy(&mpiaij->B)); 6672 PetscCall(MatGetRootType_Private(mat, &rtype)); 6673 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6674 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6675 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6676 6677 a = (Mat_SeqAIJ *)mpiaij->A->data; 6678 b = (Mat_SeqAIJ *)mpiaij->B->data; 6679 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6680 a->free_a = b->free_a = PETSC_TRUE; 6681 a->free_ij = b->free_ij = PETSC_TRUE; 6682 6683 /* conversion must happen AFTER multiply setup */ 6684 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6685 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6686 PetscCall(VecDestroy(&mpiaij->lvec)); 6687 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6688 6689 mpiaij->coo_n = coo_n; 6690 mpiaij->coo_sf = sf2; 6691 mpiaij->sendlen = nleaves; 6692 mpiaij->recvlen = nroots; 6693 6694 mpiaij->Annz = Annz; 6695 mpiaij->Bnnz = Bnnz; 6696 6697 mpiaij->Annz2 = Annz2; 6698 mpiaij->Bnnz2 = Bnnz2; 6699 6700 mpiaij->Atot1 = Atot1; 6701 mpiaij->Atot2 = Atot2; 6702 mpiaij->Btot1 = Btot1; 6703 mpiaij->Btot2 = Btot2; 6704 6705 mpiaij->Ajmap1 = Ajmap1; 6706 mpiaij->Aperm1 = Aperm1; 6707 6708 mpiaij->Bjmap1 = Bjmap1; 6709 mpiaij->Bperm1 = Bperm1; 6710 6711 mpiaij->Aimap2 = Aimap2; 6712 mpiaij->Ajmap2 = Ajmap2; 6713 mpiaij->Aperm2 = Aperm2; 6714 6715 mpiaij->Bimap2 = Bimap2; 6716 mpiaij->Bjmap2 = Bjmap2; 6717 mpiaij->Bperm2 = Bperm2; 6718 6719 mpiaij->Cperm1 = Cperm1; 6720 6721 /* Allocate in preallocation. If not used, it has zero cost on host */ 6722 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6723 PetscFunctionReturn(0); 6724 } 6725 6726 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6727 { 6728 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6729 Mat A = mpiaij->A, B = mpiaij->B; 6730 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6731 PetscScalar *Aa, *Ba; 6732 PetscScalar *sendbuf = mpiaij->sendbuf; 6733 PetscScalar *recvbuf = mpiaij->recvbuf; 6734 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6735 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6736 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6737 const PetscCount *Cperm1 = mpiaij->Cperm1; 6738 6739 PetscFunctionBegin; 6740 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6741 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6742 6743 /* Pack entries to be sent to remote */ 6744 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6745 6746 /* Send remote entries to their owner and overlap the communication with local computation */ 6747 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6748 /* Add local entries to A and B */ 6749 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6750 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6751 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6752 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6753 } 6754 for (PetscCount i = 0; i < Bnnz; i++) { 6755 PetscScalar sum = 0.0; 6756 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6757 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6758 } 6759 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6760 6761 /* Add received remote entries to A and B */ 6762 for (PetscCount i = 0; i < Annz2; i++) { 6763 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6764 } 6765 for (PetscCount i = 0; i < Bnnz2; i++) { 6766 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6767 } 6768 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6769 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6770 PetscFunctionReturn(0); 6771 } 6772 6773 /* ----------------------------------------------------------------*/ 6774 6775 /*MC 6776 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6777 6778 Options Database Keys: 6779 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6780 6781 Level: beginner 6782 6783 Notes: 6784 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6785 in this case the values associated with the rows and columns one passes in are set to zero 6786 in the matrix 6787 6788 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6789 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6790 6791 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6792 M*/ 6793 6794 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6795 { 6796 Mat_MPIAIJ *b; 6797 PetscMPIInt size; 6798 6799 PetscFunctionBegin; 6800 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6801 6802 PetscCall(PetscNew(&b)); 6803 B->data = (void *)b; 6804 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6805 B->assembled = PETSC_FALSE; 6806 B->insertmode = NOT_SET_VALUES; 6807 b->size = size; 6808 6809 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6810 6811 /* build cache for off array entries formed */ 6812 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6813 6814 b->donotstash = PETSC_FALSE; 6815 b->colmap = NULL; 6816 b->garray = NULL; 6817 b->roworiented = PETSC_TRUE; 6818 6819 /* stuff used for matrix vector multiply */ 6820 b->lvec = NULL; 6821 b->Mvctx = NULL; 6822 6823 /* stuff for MatGetRow() */ 6824 b->rowindices = NULL; 6825 b->rowvalues = NULL; 6826 b->getrowactive = PETSC_FALSE; 6827 6828 /* flexible pointer used in CUSPARSE classes */ 6829 b->spptr = NULL; 6830 6831 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6832 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6841 #if defined(PETSC_HAVE_CUDA) 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6843 #endif 6844 #if defined(PETSC_HAVE_HIP) 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6846 #endif 6847 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6849 #endif 6850 #if defined(PETSC_HAVE_MKL_SPARSE) 6851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6852 #endif 6853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6857 #if defined(PETSC_HAVE_ELEMENTAL) 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6859 #endif 6860 #if defined(PETSC_HAVE_SCALAPACK) 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6862 #endif 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6865 #if defined(PETSC_HAVE_HYPRE) 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6868 #endif 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6870 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6873 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6874 PetscFunctionReturn(0); 6875 } 6876 6877 /*@C 6878 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6879 and "off-diagonal" part of the matrix in CSR format. 6880 6881 Collective 6882 6883 Input Parameters: 6884 + comm - MPI communicator 6885 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6886 . n - This value should be the same as the local size used in creating the 6887 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6888 calculated if N is given) For square matrices n is almost always m. 6889 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6890 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6891 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6892 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6893 . a - matrix values 6894 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6895 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6896 - oa - matrix values 6897 6898 Output Parameter: 6899 . mat - the matrix 6900 6901 Level: advanced 6902 6903 Notes: 6904 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6905 must free the arrays once the matrix has been destroyed and not before. 6906 6907 The i and j indices are 0 based 6908 6909 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6910 6911 This sets local rows and cannot be used to set off-processor values. 6912 6913 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6914 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6915 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6916 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6917 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6918 communication if it is known that only local entries will be set. 6919 6920 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6921 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6922 @*/ 6923 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6924 { 6925 Mat_MPIAIJ *maij; 6926 6927 PetscFunctionBegin; 6928 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6929 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6930 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6931 PetscCall(MatCreate(comm, mat)); 6932 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6933 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6934 maij = (Mat_MPIAIJ *)(*mat)->data; 6935 6936 (*mat)->preallocated = PETSC_TRUE; 6937 6938 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6939 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6940 6941 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6942 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6943 6944 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6945 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6946 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6947 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6948 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6949 PetscFunctionReturn(0); 6950 } 6951 6952 typedef struct { 6953 Mat *mp; /* intermediate products */ 6954 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6955 PetscInt cp; /* number of intermediate products */ 6956 6957 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6958 PetscInt *startsj_s, *startsj_r; 6959 PetscScalar *bufa; 6960 Mat P_oth; 6961 6962 /* may take advantage of merging product->B */ 6963 Mat Bloc; /* B-local by merging diag and off-diag */ 6964 6965 /* cusparse does not have support to split between symbolic and numeric phases. 6966 When api_user is true, we don't need to update the numerical values 6967 of the temporary storage */ 6968 PetscBool reusesym; 6969 6970 /* support for COO values insertion */ 6971 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6972 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6973 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6974 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6975 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6976 PetscMemType mtype; 6977 6978 /* customization */ 6979 PetscBool abmerge; 6980 PetscBool P_oth_bind; 6981 } MatMatMPIAIJBACKEND; 6982 6983 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6984 { 6985 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6986 PetscInt i; 6987 6988 PetscFunctionBegin; 6989 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6990 PetscCall(PetscFree(mmdata->bufa)); 6991 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6992 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6993 PetscCall(MatDestroy(&mmdata->P_oth)); 6994 PetscCall(MatDestroy(&mmdata->Bloc)); 6995 PetscCall(PetscSFDestroy(&mmdata->sf)); 6996 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6997 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6998 PetscCall(PetscFree(mmdata->own[0])); 6999 PetscCall(PetscFree(mmdata->own)); 7000 PetscCall(PetscFree(mmdata->off[0])); 7001 PetscCall(PetscFree(mmdata->off)); 7002 PetscCall(PetscFree(mmdata)); 7003 PetscFunctionReturn(0); 7004 } 7005 7006 /* Copy selected n entries with indices in idx[] of A to v[]. 7007 If idx is NULL, copy the whole data array of A to v[] 7008 */ 7009 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7010 { 7011 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7012 7013 PetscFunctionBegin; 7014 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7015 if (f) { 7016 PetscCall((*f)(A, n, idx, v)); 7017 } else { 7018 const PetscScalar *vv; 7019 7020 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7021 if (n && idx) { 7022 PetscScalar *w = v; 7023 const PetscInt *oi = idx; 7024 PetscInt j; 7025 7026 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7027 } else { 7028 PetscCall(PetscArraycpy(v, vv, n)); 7029 } 7030 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7031 } 7032 PetscFunctionReturn(0); 7033 } 7034 7035 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7036 { 7037 MatMatMPIAIJBACKEND *mmdata; 7038 PetscInt i, n_d, n_o; 7039 7040 PetscFunctionBegin; 7041 MatCheckProduct(C, 1); 7042 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7043 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7044 if (!mmdata->reusesym) { /* update temporary matrices */ 7045 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7046 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7047 } 7048 mmdata->reusesym = PETSC_FALSE; 7049 7050 for (i = 0; i < mmdata->cp; i++) { 7051 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7052 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7053 } 7054 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7055 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7056 7057 if (mmdata->mptmp[i]) continue; 7058 if (noff) { 7059 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7060 7061 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7062 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7063 n_o += noff; 7064 n_d += nown; 7065 } else { 7066 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7067 7068 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7069 n_d += mm->nz; 7070 } 7071 } 7072 if (mmdata->hasoffproc) { /* offprocess insertion */ 7073 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7074 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7075 } 7076 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7077 PetscFunctionReturn(0); 7078 } 7079 7080 /* Support for Pt * A, A * P, or Pt * A * P */ 7081 #define MAX_NUMBER_INTERMEDIATE 4 7082 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7083 { 7084 Mat_Product *product = C->product; 7085 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7086 Mat_MPIAIJ *a, *p; 7087 MatMatMPIAIJBACKEND *mmdata; 7088 ISLocalToGlobalMapping P_oth_l2g = NULL; 7089 IS glob = NULL; 7090 const char *prefix; 7091 char pprefix[256]; 7092 const PetscInt *globidx, *P_oth_idx; 7093 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7094 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7095 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7096 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7097 /* a base offset; type-2: sparse with a local to global map table */ 7098 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7099 7100 MatProductType ptype; 7101 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7102 PetscMPIInt size; 7103 7104 PetscFunctionBegin; 7105 MatCheckProduct(C, 1); 7106 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7107 ptype = product->type; 7108 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7109 ptype = MATPRODUCT_AB; 7110 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7111 } 7112 switch (ptype) { 7113 case MATPRODUCT_AB: 7114 A = product->A; 7115 P = product->B; 7116 m = A->rmap->n; 7117 n = P->cmap->n; 7118 M = A->rmap->N; 7119 N = P->cmap->N; 7120 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7121 break; 7122 case MATPRODUCT_AtB: 7123 P = product->A; 7124 A = product->B; 7125 m = P->cmap->n; 7126 n = A->cmap->n; 7127 M = P->cmap->N; 7128 N = A->cmap->N; 7129 hasoffproc = PETSC_TRUE; 7130 break; 7131 case MATPRODUCT_PtAP: 7132 A = product->A; 7133 P = product->B; 7134 m = P->cmap->n; 7135 n = P->cmap->n; 7136 M = P->cmap->N; 7137 N = P->cmap->N; 7138 hasoffproc = PETSC_TRUE; 7139 break; 7140 default: 7141 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7142 } 7143 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7144 if (size == 1) hasoffproc = PETSC_FALSE; 7145 7146 /* defaults */ 7147 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7148 mp[i] = NULL; 7149 mptmp[i] = PETSC_FALSE; 7150 rmapt[i] = -1; 7151 cmapt[i] = -1; 7152 rmapa[i] = NULL; 7153 cmapa[i] = NULL; 7154 } 7155 7156 /* customization */ 7157 PetscCall(PetscNew(&mmdata)); 7158 mmdata->reusesym = product->api_user; 7159 if (ptype == MATPRODUCT_AB) { 7160 if (product->api_user) { 7161 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7162 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7163 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7164 PetscOptionsEnd(); 7165 } else { 7166 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7167 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7168 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7169 PetscOptionsEnd(); 7170 } 7171 } else if (ptype == MATPRODUCT_PtAP) { 7172 if (product->api_user) { 7173 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7174 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7175 PetscOptionsEnd(); 7176 } else { 7177 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7178 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7179 PetscOptionsEnd(); 7180 } 7181 } 7182 a = (Mat_MPIAIJ *)A->data; 7183 p = (Mat_MPIAIJ *)P->data; 7184 PetscCall(MatSetSizes(C, m, n, M, N)); 7185 PetscCall(PetscLayoutSetUp(C->rmap)); 7186 PetscCall(PetscLayoutSetUp(C->cmap)); 7187 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7188 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7189 7190 cp = 0; 7191 switch (ptype) { 7192 case MATPRODUCT_AB: /* A * P */ 7193 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7194 7195 /* A_diag * P_local (merged or not) */ 7196 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7197 /* P is product->B */ 7198 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7199 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7200 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7201 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7202 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7203 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7204 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7205 mp[cp]->product->api_user = product->api_user; 7206 PetscCall(MatProductSetFromOptions(mp[cp])); 7207 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7208 PetscCall(ISGetIndices(glob, &globidx)); 7209 rmapt[cp] = 1; 7210 cmapt[cp] = 2; 7211 cmapa[cp] = globidx; 7212 mptmp[cp] = PETSC_FALSE; 7213 cp++; 7214 } else { /* A_diag * P_diag and A_diag * P_off */ 7215 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7216 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7217 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7218 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7219 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7220 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7221 mp[cp]->product->api_user = product->api_user; 7222 PetscCall(MatProductSetFromOptions(mp[cp])); 7223 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7224 rmapt[cp] = 1; 7225 cmapt[cp] = 1; 7226 mptmp[cp] = PETSC_FALSE; 7227 cp++; 7228 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7229 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7230 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7231 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7232 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7233 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7234 mp[cp]->product->api_user = product->api_user; 7235 PetscCall(MatProductSetFromOptions(mp[cp])); 7236 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7237 rmapt[cp] = 1; 7238 cmapt[cp] = 2; 7239 cmapa[cp] = p->garray; 7240 mptmp[cp] = PETSC_FALSE; 7241 cp++; 7242 } 7243 7244 /* A_off * P_other */ 7245 if (mmdata->P_oth) { 7246 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7247 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7248 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7249 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7250 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7251 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7252 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7253 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7254 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7255 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7256 mp[cp]->product->api_user = product->api_user; 7257 PetscCall(MatProductSetFromOptions(mp[cp])); 7258 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7259 rmapt[cp] = 1; 7260 cmapt[cp] = 2; 7261 cmapa[cp] = P_oth_idx; 7262 mptmp[cp] = PETSC_FALSE; 7263 cp++; 7264 } 7265 break; 7266 7267 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7268 /* A is product->B */ 7269 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7270 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7271 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7272 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7273 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7274 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7275 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7276 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7277 mp[cp]->product->api_user = product->api_user; 7278 PetscCall(MatProductSetFromOptions(mp[cp])); 7279 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7280 PetscCall(ISGetIndices(glob, &globidx)); 7281 rmapt[cp] = 2; 7282 rmapa[cp] = globidx; 7283 cmapt[cp] = 2; 7284 cmapa[cp] = globidx; 7285 mptmp[cp] = PETSC_FALSE; 7286 cp++; 7287 } else { 7288 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7289 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7290 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7291 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7292 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7293 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7294 mp[cp]->product->api_user = product->api_user; 7295 PetscCall(MatProductSetFromOptions(mp[cp])); 7296 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7297 PetscCall(ISGetIndices(glob, &globidx)); 7298 rmapt[cp] = 1; 7299 cmapt[cp] = 2; 7300 cmapa[cp] = globidx; 7301 mptmp[cp] = PETSC_FALSE; 7302 cp++; 7303 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7304 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7305 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7306 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7307 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7308 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7309 mp[cp]->product->api_user = product->api_user; 7310 PetscCall(MatProductSetFromOptions(mp[cp])); 7311 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7312 rmapt[cp] = 2; 7313 rmapa[cp] = p->garray; 7314 cmapt[cp] = 2; 7315 cmapa[cp] = globidx; 7316 mptmp[cp] = PETSC_FALSE; 7317 cp++; 7318 } 7319 break; 7320 case MATPRODUCT_PtAP: 7321 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7322 /* P is product->B */ 7323 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7324 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7325 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7326 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7327 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7328 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7329 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7330 mp[cp]->product->api_user = product->api_user; 7331 PetscCall(MatProductSetFromOptions(mp[cp])); 7332 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7333 PetscCall(ISGetIndices(glob, &globidx)); 7334 rmapt[cp] = 2; 7335 rmapa[cp] = globidx; 7336 cmapt[cp] = 2; 7337 cmapa[cp] = globidx; 7338 mptmp[cp] = PETSC_FALSE; 7339 cp++; 7340 if (mmdata->P_oth) { 7341 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7342 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7343 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7344 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7345 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7346 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7347 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7348 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7349 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7350 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7351 mp[cp]->product->api_user = product->api_user; 7352 PetscCall(MatProductSetFromOptions(mp[cp])); 7353 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7354 mptmp[cp] = PETSC_TRUE; 7355 cp++; 7356 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7357 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7358 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7359 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7360 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7361 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7362 mp[cp]->product->api_user = product->api_user; 7363 PetscCall(MatProductSetFromOptions(mp[cp])); 7364 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7365 rmapt[cp] = 2; 7366 rmapa[cp] = globidx; 7367 cmapt[cp] = 2; 7368 cmapa[cp] = P_oth_idx; 7369 mptmp[cp] = PETSC_FALSE; 7370 cp++; 7371 } 7372 break; 7373 default: 7374 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7375 } 7376 /* sanity check */ 7377 if (size > 1) 7378 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7379 7380 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7381 for (i = 0; i < cp; i++) { 7382 mmdata->mp[i] = mp[i]; 7383 mmdata->mptmp[i] = mptmp[i]; 7384 } 7385 mmdata->cp = cp; 7386 C->product->data = mmdata; 7387 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7388 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7389 7390 /* memory type */ 7391 mmdata->mtype = PETSC_MEMTYPE_HOST; 7392 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7393 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7394 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7395 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7396 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7397 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7398 7399 /* prepare coo coordinates for values insertion */ 7400 7401 /* count total nonzeros of those intermediate seqaij Mats 7402 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7403 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7404 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7405 */ 7406 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7407 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7408 if (mptmp[cp]) continue; 7409 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7410 const PetscInt *rmap = rmapa[cp]; 7411 const PetscInt mr = mp[cp]->rmap->n; 7412 const PetscInt rs = C->rmap->rstart; 7413 const PetscInt re = C->rmap->rend; 7414 const PetscInt *ii = mm->i; 7415 for (i = 0; i < mr; i++) { 7416 const PetscInt gr = rmap[i]; 7417 const PetscInt nz = ii[i + 1] - ii[i]; 7418 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7419 else ncoo_oown += nz; /* this row is local */ 7420 } 7421 } else ncoo_d += mm->nz; 7422 } 7423 7424 /* 7425 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7426 7427 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7428 7429 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7430 7431 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7432 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7433 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7434 7435 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7436 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7437 */ 7438 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7439 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7440 7441 /* gather (i,j) of nonzeros inserted by remote procs */ 7442 if (hasoffproc) { 7443 PetscSF msf; 7444 PetscInt ncoo2, *coo_i2, *coo_j2; 7445 7446 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7447 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7448 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7449 7450 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7451 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7452 PetscInt *idxoff = mmdata->off[cp]; 7453 PetscInt *idxown = mmdata->own[cp]; 7454 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7455 const PetscInt *rmap = rmapa[cp]; 7456 const PetscInt *cmap = cmapa[cp]; 7457 const PetscInt *ii = mm->i; 7458 PetscInt *coi = coo_i + ncoo_o; 7459 PetscInt *coj = coo_j + ncoo_o; 7460 const PetscInt mr = mp[cp]->rmap->n; 7461 const PetscInt rs = C->rmap->rstart; 7462 const PetscInt re = C->rmap->rend; 7463 const PetscInt cs = C->cmap->rstart; 7464 for (i = 0; i < mr; i++) { 7465 const PetscInt *jj = mm->j + ii[i]; 7466 const PetscInt gr = rmap[i]; 7467 const PetscInt nz = ii[i + 1] - ii[i]; 7468 if (gr < rs || gr >= re) { /* this is an offproc row */ 7469 for (j = ii[i]; j < ii[i + 1]; j++) { 7470 *coi++ = gr; 7471 *idxoff++ = j; 7472 } 7473 if (!cmapt[cp]) { /* already global */ 7474 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7475 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7476 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7477 } else { /* offdiag */ 7478 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7479 } 7480 ncoo_o += nz; 7481 } else { /* this is a local row */ 7482 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7483 } 7484 } 7485 } 7486 mmdata->off[cp + 1] = idxoff; 7487 mmdata->own[cp + 1] = idxown; 7488 } 7489 7490 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7491 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7492 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7493 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7494 ncoo = ncoo_d + ncoo_oown + ncoo2; 7495 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7496 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7497 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7498 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7499 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7500 PetscCall(PetscFree2(coo_i, coo_j)); 7501 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7502 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7503 coo_i = coo_i2; 7504 coo_j = coo_j2; 7505 } else { /* no offproc values insertion */ 7506 ncoo = ncoo_d; 7507 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7508 7509 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7510 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7511 PetscCall(PetscSFSetUp(mmdata->sf)); 7512 } 7513 mmdata->hasoffproc = hasoffproc; 7514 7515 /* gather (i,j) of nonzeros inserted locally */ 7516 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7517 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7518 PetscInt *coi = coo_i + ncoo_d; 7519 PetscInt *coj = coo_j + ncoo_d; 7520 const PetscInt *jj = mm->j; 7521 const PetscInt *ii = mm->i; 7522 const PetscInt *cmap = cmapa[cp]; 7523 const PetscInt *rmap = rmapa[cp]; 7524 const PetscInt mr = mp[cp]->rmap->n; 7525 const PetscInt rs = C->rmap->rstart; 7526 const PetscInt re = C->rmap->rend; 7527 const PetscInt cs = C->cmap->rstart; 7528 7529 if (mptmp[cp]) continue; 7530 if (rmapt[cp] == 1) { /* consecutive rows */ 7531 /* fill coo_i */ 7532 for (i = 0; i < mr; i++) { 7533 const PetscInt gr = i + rs; 7534 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7535 } 7536 /* fill coo_j */ 7537 if (!cmapt[cp]) { /* type-0, already global */ 7538 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7539 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7540 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7541 } else { /* type-2, local to global for sparse columns */ 7542 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7543 } 7544 ncoo_d += mm->nz; 7545 } else if (rmapt[cp] == 2) { /* sparse rows */ 7546 for (i = 0; i < mr; i++) { 7547 const PetscInt *jj = mm->j + ii[i]; 7548 const PetscInt gr = rmap[i]; 7549 const PetscInt nz = ii[i + 1] - ii[i]; 7550 if (gr >= rs && gr < re) { /* local rows */ 7551 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7552 if (!cmapt[cp]) { /* type-0, already global */ 7553 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7554 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7555 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7556 } else { /* type-2, local to global for sparse columns */ 7557 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7558 } 7559 ncoo_d += nz; 7560 } 7561 } 7562 } 7563 } 7564 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7565 PetscCall(ISDestroy(&glob)); 7566 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7567 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7568 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7569 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7570 7571 /* preallocate with COO data */ 7572 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7573 PetscCall(PetscFree2(coo_i, coo_j)); 7574 PetscFunctionReturn(0); 7575 } 7576 7577 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7578 { 7579 Mat_Product *product = mat->product; 7580 #if defined(PETSC_HAVE_DEVICE) 7581 PetscBool match = PETSC_FALSE; 7582 PetscBool usecpu = PETSC_FALSE; 7583 #else 7584 PetscBool match = PETSC_TRUE; 7585 #endif 7586 7587 PetscFunctionBegin; 7588 MatCheckProduct(mat, 1); 7589 #if defined(PETSC_HAVE_DEVICE) 7590 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7591 if (match) { /* we can always fallback to the CPU if requested */ 7592 switch (product->type) { 7593 case MATPRODUCT_AB: 7594 if (product->api_user) { 7595 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7596 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7597 PetscOptionsEnd(); 7598 } else { 7599 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7600 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7601 PetscOptionsEnd(); 7602 } 7603 break; 7604 case MATPRODUCT_AtB: 7605 if (product->api_user) { 7606 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7607 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7608 PetscOptionsEnd(); 7609 } else { 7610 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7611 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7612 PetscOptionsEnd(); 7613 } 7614 break; 7615 case MATPRODUCT_PtAP: 7616 if (product->api_user) { 7617 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7618 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7619 PetscOptionsEnd(); 7620 } else { 7621 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7622 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7623 PetscOptionsEnd(); 7624 } 7625 break; 7626 default: 7627 break; 7628 } 7629 match = (PetscBool)!usecpu; 7630 } 7631 #endif 7632 if (match) { 7633 switch (product->type) { 7634 case MATPRODUCT_AB: 7635 case MATPRODUCT_AtB: 7636 case MATPRODUCT_PtAP: 7637 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7638 break; 7639 default: 7640 break; 7641 } 7642 } 7643 /* fallback to MPIAIJ ops */ 7644 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7645 PetscFunctionReturn(0); 7646 } 7647 7648 /* 7649 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7650 7651 n - the number of block indices in cc[] 7652 cc - the block indices (must be large enough to contain the indices) 7653 */ 7654 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7655 { 7656 PetscInt cnt = -1, nidx, j; 7657 const PetscInt *idx; 7658 7659 PetscFunctionBegin; 7660 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7661 if (nidx) { 7662 cnt = 0; 7663 cc[cnt] = idx[0] / bs; 7664 for (j = 1; j < nidx; j++) { 7665 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7666 } 7667 } 7668 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7669 *n = cnt + 1; 7670 PetscFunctionReturn(0); 7671 } 7672 7673 /* 7674 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7675 7676 ncollapsed - the number of block indices 7677 collapsed - the block indices (must be large enough to contain the indices) 7678 */ 7679 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7680 { 7681 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7682 7683 PetscFunctionBegin; 7684 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7685 for (i = start + 1; i < start + bs; i++) { 7686 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7687 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7688 cprevtmp = cprev; 7689 cprev = merged; 7690 merged = cprevtmp; 7691 } 7692 *ncollapsed = nprev; 7693 if (collapsed) *collapsed = cprev; 7694 PetscFunctionReturn(0); 7695 } 7696 7697 /* 7698 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7699 */ 7700 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7701 { 7702 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7703 Mat tGmat; 7704 MPI_Comm comm; 7705 const PetscScalar *vals; 7706 const PetscInt *idx; 7707 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7708 MatScalar *AA; // this is checked in graph 7709 PetscBool isseqaij; 7710 Mat a, b, c; 7711 MatType jtype; 7712 7713 PetscFunctionBegin; 7714 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7715 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7716 PetscCall(MatGetType(Gmat, &jtype)); 7717 PetscCall(MatCreate(comm, &tGmat)); 7718 PetscCall(MatSetType(tGmat, jtype)); 7719 7720 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7721 Also, if the matrix is symmetric, can we skip this 7722 operation? It can be very expensive on large matrices. */ 7723 7724 // global sizes 7725 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7726 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7727 nloc = Iend - Istart; 7728 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7729 if (isseqaij) { 7730 a = Gmat; 7731 b = NULL; 7732 } else { 7733 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7734 a = d->A; 7735 b = d->B; 7736 garray = d->garray; 7737 } 7738 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7739 for (PetscInt row = 0; row < nloc; row++) { 7740 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7741 d_nnz[row] = ncols; 7742 if (ncols > maxcols) maxcols = ncols; 7743 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7744 } 7745 if (b) { 7746 for (PetscInt row = 0; row < nloc; row++) { 7747 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7748 o_nnz[row] = ncols; 7749 if (ncols > maxcols) maxcols = ncols; 7750 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7751 } 7752 } 7753 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7754 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7755 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7756 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7757 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7758 PetscCall(PetscFree2(d_nnz, o_nnz)); 7759 // 7760 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7761 nnz0 = nnz1 = 0; 7762 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7763 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7764 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7765 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7766 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7767 if (PetscRealPart(sv) > vfilter) { 7768 nnz1++; 7769 PetscInt cid = idx[jj] + Istart; //diag 7770 if (c != a) cid = garray[idx[jj]]; 7771 AA[ncol_row] = vals[jj]; 7772 AJ[ncol_row] = cid; 7773 ncol_row++; 7774 } 7775 } 7776 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7777 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7778 } 7779 } 7780 PetscCall(PetscFree2(AA, AJ)); 7781 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7782 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7783 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7784 7785 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7786 7787 *filteredG = tGmat; 7788 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7789 PetscFunctionReturn(0); 7790 } 7791 7792 /* 7793 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7794 7795 Input Parameter: 7796 . Amat - matrix 7797 - symmetrize - make the result symmetric 7798 + scale - scale with diagonal 7799 7800 Output Parameter: 7801 . a_Gmat - output scalar graph >= 0 7802 7803 */ 7804 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7805 { 7806 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7807 MPI_Comm comm; 7808 Mat Gmat; 7809 PetscBool ismpiaij, isseqaij; 7810 Mat a, b, c; 7811 MatType jtype; 7812 7813 PetscFunctionBegin; 7814 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7815 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7816 PetscCall(MatGetSize(Amat, &MM, &NN)); 7817 PetscCall(MatGetBlockSize(Amat, &bs)); 7818 nloc = (Iend - Istart) / bs; 7819 7820 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7821 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7822 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7823 7824 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7825 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7826 implementation */ 7827 if (bs > 1) { 7828 PetscCall(MatGetType(Amat, &jtype)); 7829 PetscCall(MatCreate(comm, &Gmat)); 7830 PetscCall(MatSetType(Gmat, jtype)); 7831 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7832 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7833 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7834 PetscInt *d_nnz, *o_nnz; 7835 MatScalar *aa, val, AA[4096]; 7836 PetscInt *aj, *ai, AJ[4096], nc; 7837 if (isseqaij) { 7838 a = Amat; 7839 b = NULL; 7840 } else { 7841 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7842 a = d->A; 7843 b = d->B; 7844 } 7845 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7846 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7847 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7848 PetscInt *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0; 7849 const PetscInt *cols; 7850 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7851 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7852 nnz[brow / bs] = jj / bs; 7853 if (jj % bs) ok = 0; 7854 if (cols) j0 = cols[0]; 7855 else j0 = -1; 7856 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7857 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7858 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7859 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7860 if (jj % bs) ok = 0; 7861 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7862 if (nnz[brow / bs] != jj / bs) ok = 0; 7863 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7864 } 7865 if (!ok) { 7866 PetscCall(PetscFree2(d_nnz, o_nnz)); 7867 goto old_bs; 7868 } 7869 } 7870 PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax); 7871 } 7872 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7873 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7874 PetscCall(PetscFree2(d_nnz, o_nnz)); 7875 // diag 7876 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7877 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7878 ai = aseq->i; 7879 n = ai[brow + 1] - ai[brow]; 7880 aj = aseq->j + ai[brow]; 7881 for (int k = 0; k < n; k += bs) { // block columns 7882 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7883 val = 0; 7884 for (int ii = 0; ii < bs; ii++) { // rows in block 7885 aa = aseq->a + ai[brow + ii] + k; 7886 for (int jj = 0; jj < bs; jj++) { // columns in block 7887 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7888 } 7889 } 7890 AA[k / bs] = val; 7891 } 7892 grow = Istart / bs + brow / bs; 7893 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7894 } 7895 // off-diag 7896 if (ismpiaij) { 7897 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7898 const PetscScalar *vals; 7899 const PetscInt *cols, *garray = aij->garray; 7900 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7901 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7902 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7903 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7904 AA[k / bs] = 0; 7905 AJ[cidx] = garray[cols[k]] / bs; 7906 } 7907 nc = ncols / bs; 7908 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7909 for (int ii = 0; ii < bs; ii++) { // rows in block 7910 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7911 for (int k = 0; k < ncols; k += bs) { 7912 for (int jj = 0; jj < bs; jj++) { // cols in block 7913 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7914 } 7915 } 7916 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7917 } 7918 grow = Istart / bs + brow / bs; 7919 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7920 } 7921 } 7922 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7923 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7924 } else { 7925 const PetscScalar *vals; 7926 const PetscInt *idx; 7927 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7928 old_bs: 7929 /* 7930 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7931 */ 7932 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7933 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7934 if (isseqaij) { 7935 PetscInt max_d_nnz; 7936 /* 7937 Determine exact preallocation count for (sequential) scalar matrix 7938 */ 7939 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7940 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7941 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7942 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7943 PetscCall(PetscFree3(w0, w1, w2)); 7944 } else if (ismpiaij) { 7945 Mat Daij, Oaij; 7946 const PetscInt *garray; 7947 PetscInt max_d_nnz; 7948 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7949 /* 7950 Determine exact preallocation count for diagonal block portion of scalar matrix 7951 */ 7952 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7953 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7954 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7955 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7956 PetscCall(PetscFree3(w0, w1, w2)); 7957 /* 7958 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7959 */ 7960 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7961 o_nnz[jj] = 0; 7962 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7963 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7964 o_nnz[jj] += ncols; 7965 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7966 } 7967 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7968 } 7969 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7970 /* get scalar copy (norms) of matrix */ 7971 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7972 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7973 PetscCall(PetscFree2(d_nnz, o_nnz)); 7974 for (Ii = Istart; Ii < Iend; Ii++) { 7975 PetscInt dest_row = Ii / bs; 7976 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7977 for (jj = 0; jj < ncols; jj++) { 7978 PetscInt dest_col = idx[jj] / bs; 7979 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7980 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7981 } 7982 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7983 } 7984 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7985 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7986 } 7987 } else { 7988 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7989 else { 7990 Gmat = Amat; 7991 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7992 } 7993 if (isseqaij) { 7994 a = Gmat; 7995 b = NULL; 7996 } else { 7997 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7998 a = d->A; 7999 b = d->B; 8000 } 8001 if (filter >= 0 || scale) { 8002 /* take absolute value of each entry */ 8003 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8004 MatInfo info; 8005 PetscScalar *avals; 8006 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8007 PetscCall(MatSeqAIJGetArray(c, &avals)); 8008 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8009 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8010 } 8011 } 8012 } 8013 if (symmetrize) { 8014 PetscBool isset, issym; 8015 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8016 if (!isset || !issym) { 8017 Mat matTrans; 8018 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8019 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8020 PetscCall(MatDestroy(&matTrans)); 8021 } 8022 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8023 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8024 if (scale) { 8025 /* scale c for all diagonal values = 1 or -1 */ 8026 Vec diag; 8027 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8028 PetscCall(MatGetDiagonal(Gmat, diag)); 8029 PetscCall(VecReciprocal(diag)); 8030 PetscCall(VecSqrtAbs(diag)); 8031 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8032 PetscCall(VecDestroy(&diag)); 8033 } 8034 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8035 8036 if (filter >= 0) { 8037 Mat Fmat = NULL; /* some silly compiler needs this */ 8038 8039 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8040 PetscCall(MatDestroy(&Gmat)); 8041 Gmat = Fmat; 8042 } 8043 *a_Gmat = Gmat; 8044 PetscFunctionReturn(0); 8045 } 8046 8047 /* 8048 Special version for direct calls from Fortran 8049 */ 8050 #include <petsc/private/fortranimpl.h> 8051 8052 /* Change these macros so can be used in void function */ 8053 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8054 #undef PetscCall 8055 #define PetscCall(...) \ 8056 do { \ 8057 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8058 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8059 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8060 return; \ 8061 } \ 8062 } while (0) 8063 8064 #undef SETERRQ 8065 #define SETERRQ(comm, ierr, ...) \ 8066 do { \ 8067 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8068 return; \ 8069 } while (0) 8070 8071 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8072 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8073 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8074 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8075 #else 8076 #endif 8077 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8078 { 8079 Mat mat = *mmat; 8080 PetscInt m = *mm, n = *mn; 8081 InsertMode addv = *maddv; 8082 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8083 PetscScalar value; 8084 8085 MatCheckPreallocated(mat, 1); 8086 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8087 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8088 { 8089 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8090 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8091 PetscBool roworiented = aij->roworiented; 8092 8093 /* Some Variables required in the macro */ 8094 Mat A = aij->A; 8095 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8096 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8097 MatScalar *aa; 8098 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8099 Mat B = aij->B; 8100 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8101 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8102 MatScalar *ba; 8103 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8104 * cannot use "#if defined" inside a macro. */ 8105 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8106 8107 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8108 PetscInt nonew = a->nonew; 8109 MatScalar *ap1, *ap2; 8110 8111 PetscFunctionBegin; 8112 PetscCall(MatSeqAIJGetArray(A, &aa)); 8113 PetscCall(MatSeqAIJGetArray(B, &ba)); 8114 for (i = 0; i < m; i++) { 8115 if (im[i] < 0) continue; 8116 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8117 if (im[i] >= rstart && im[i] < rend) { 8118 row = im[i] - rstart; 8119 lastcol1 = -1; 8120 rp1 = aj + ai[row]; 8121 ap1 = aa + ai[row]; 8122 rmax1 = aimax[row]; 8123 nrow1 = ailen[row]; 8124 low1 = 0; 8125 high1 = nrow1; 8126 lastcol2 = -1; 8127 rp2 = bj + bi[row]; 8128 ap2 = ba + bi[row]; 8129 rmax2 = bimax[row]; 8130 nrow2 = bilen[row]; 8131 low2 = 0; 8132 high2 = nrow2; 8133 8134 for (j = 0; j < n; j++) { 8135 if (roworiented) value = v[i * n + j]; 8136 else value = v[i + j * m]; 8137 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8138 if (in[j] >= cstart && in[j] < cend) { 8139 col = in[j] - cstart; 8140 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8141 } else if (in[j] < 0) continue; 8142 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8143 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8144 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8145 } else { 8146 if (mat->was_assembled) { 8147 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8148 #if defined(PETSC_USE_CTABLE) 8149 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8150 col--; 8151 #else 8152 col = aij->colmap[in[j]] - 1; 8153 #endif 8154 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8155 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8156 col = in[j]; 8157 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8158 B = aij->B; 8159 b = (Mat_SeqAIJ *)B->data; 8160 bimax = b->imax; 8161 bi = b->i; 8162 bilen = b->ilen; 8163 bj = b->j; 8164 rp2 = bj + bi[row]; 8165 ap2 = ba + bi[row]; 8166 rmax2 = bimax[row]; 8167 nrow2 = bilen[row]; 8168 low2 = 0; 8169 high2 = nrow2; 8170 bm = aij->B->rmap->n; 8171 ba = b->a; 8172 inserted = PETSC_FALSE; 8173 } 8174 } else col = in[j]; 8175 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8176 } 8177 } 8178 } else if (!aij->donotstash) { 8179 if (roworiented) { 8180 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8181 } else { 8182 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8183 } 8184 } 8185 } 8186 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8187 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8188 } 8189 PetscFunctionReturnVoid(); 8190 } 8191 8192 /* Undefining these here since they were redefined from their original definition above! No 8193 * other PETSc functions should be defined past this point, as it is impossible to recover the 8194 * original definitions */ 8195 #undef PetscCall 8196 #undef SETERRQ 8197