1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 10 { 11 Mat B; 12 13 PetscFunctionBegin; 14 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 15 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 16 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 17 PetscCall(MatDestroy(&B)); 18 PetscFunctionReturn(0); 19 } 20 21 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 22 { 23 Mat B; 24 25 PetscFunctionBegin; 26 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 27 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 28 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 29 PetscFunctionReturn(0); 30 } 31 32 /*MC 33 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 34 35 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 36 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 37 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 38 for communicators controlling multiple processes. It is recommended that you call both of 39 the above preallocation routines for simplicity. 40 41 Options Database Keys: 42 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 43 44 Developer Note: 45 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 46 enough exist. 47 48 Level: beginner 49 50 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 51 M*/ 52 53 /*MC 54 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 55 56 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 57 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 58 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 59 for communicators controlling multiple processes. It is recommended that you call both of 60 the above preallocation routines for simplicity. 61 62 Options Database Keys: 63 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 64 65 Level: beginner 66 67 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 68 M*/ 69 70 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 71 { 72 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 73 74 PetscFunctionBegin; 75 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 76 A->boundtocpu = flg; 77 #endif 78 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 79 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 80 81 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 82 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 83 * to differ from the parent matrix. */ 84 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 85 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 86 87 PetscFunctionReturn(0); 88 } 89 90 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 91 { 92 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 93 94 PetscFunctionBegin; 95 if (mat->A) { 96 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 97 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 98 } 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 103 { 104 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 105 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 106 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 107 const PetscInt *ia, *ib; 108 const MatScalar *aa, *bb, *aav, *bav; 109 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 110 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 111 112 PetscFunctionBegin; 113 *keptrows = NULL; 114 115 ia = a->i; 116 ib = b->i; 117 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 118 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 119 for (i = 0; i < m; i++) { 120 na = ia[i + 1] - ia[i]; 121 nb = ib[i + 1] - ib[i]; 122 if (!na && !nb) { 123 cnt++; 124 goto ok1; 125 } 126 aa = aav + ia[i]; 127 for (j = 0; j < na; j++) { 128 if (aa[j] != 0.0) goto ok1; 129 } 130 bb = bav + ib[i]; 131 for (j = 0; j < nb; j++) { 132 if (bb[j] != 0.0) goto ok1; 133 } 134 cnt++; 135 ok1:; 136 } 137 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 138 if (!n0rows) { 139 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 140 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 141 PetscFunctionReturn(0); 142 } 143 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 144 cnt = 0; 145 for (i = 0; i < m; i++) { 146 na = ia[i + 1] - ia[i]; 147 nb = ib[i + 1] - ib[i]; 148 if (!na && !nb) continue; 149 aa = aav + ia[i]; 150 for (j = 0; j < na; j++) { 151 if (aa[j] != 0.0) { 152 rows[cnt++] = rstart + i; 153 goto ok2; 154 } 155 } 156 bb = bav + ib[i]; 157 for (j = 0; j < nb; j++) { 158 if (bb[j] != 0.0) { 159 rows[cnt++] = rstart + i; 160 goto ok2; 161 } 162 } 163 ok2:; 164 } 165 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 166 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 167 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 172 { 173 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 174 PetscBool cong; 175 176 PetscFunctionBegin; 177 PetscCall(MatHasCongruentLayouts(Y, &cong)); 178 if (Y->assembled && cong) { 179 PetscCall(MatDiagonalSet(aij->A, D, is)); 180 } else { 181 PetscCall(MatDiagonalSet_Default(Y, D, is)); 182 } 183 PetscFunctionReturn(0); 184 } 185 186 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 187 { 188 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 189 PetscInt i, rstart, nrows, *rows; 190 191 PetscFunctionBegin; 192 *zrows = NULL; 193 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 194 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 195 for (i = 0; i < nrows; i++) rows[i] += rstart; 196 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 201 { 202 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 203 PetscInt i, m, n, *garray = aij->garray; 204 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 205 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 206 PetscReal *work; 207 const PetscScalar *dummy; 208 209 PetscFunctionBegin; 210 PetscCall(MatGetSize(A, &m, &n)); 211 PetscCall(PetscCalloc1(n, &work)); 212 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 213 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 214 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 215 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 216 if (type == NORM_2) { 217 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 218 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 219 } else if (type == NORM_1) { 220 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 221 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 222 } else if (type == NORM_INFINITY) { 223 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 224 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 225 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 226 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 227 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 228 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 229 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 230 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 231 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 232 if (type == NORM_INFINITY) { 233 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 234 } else { 235 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 236 } 237 PetscCall(PetscFree(work)); 238 if (type == NORM_2) { 239 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 240 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 241 for (i = 0; i < n; i++) reductions[i] /= m; 242 } 243 PetscFunctionReturn(0); 244 } 245 246 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 247 { 248 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 249 IS sis, gis; 250 const PetscInt *isis, *igis; 251 PetscInt n, *iis, nsis, ngis, rstart, i; 252 253 PetscFunctionBegin; 254 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 255 PetscCall(MatFindNonzeroRows(a->B, &gis)); 256 PetscCall(ISGetSize(gis, &ngis)); 257 PetscCall(ISGetSize(sis, &nsis)); 258 PetscCall(ISGetIndices(sis, &isis)); 259 PetscCall(ISGetIndices(gis, &igis)); 260 261 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 262 PetscCall(PetscArraycpy(iis, igis, ngis)); 263 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 264 n = ngis + nsis; 265 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 266 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 267 for (i = 0; i < n; i++) iis[i] += rstart; 268 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 269 270 PetscCall(ISRestoreIndices(sis, &isis)); 271 PetscCall(ISRestoreIndices(gis, &igis)); 272 PetscCall(ISDestroy(&sis)); 273 PetscCall(ISDestroy(&gis)); 274 PetscFunctionReturn(0); 275 } 276 277 /* 278 Local utility routine that creates a mapping from the global column 279 number to the local number in the off-diagonal part of the local 280 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 281 a slightly higher hash table cost; without it it is not scalable (each processor 282 has an order N integer array but is fast to access. 283 */ 284 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 285 { 286 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 287 PetscInt n = aij->B->cmap->n, i; 288 289 PetscFunctionBegin; 290 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 291 #if defined(PETSC_USE_CTABLE) 292 PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap)); 293 for (i = 0; i < n; i++) PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES)); 294 #else 295 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 296 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 297 #endif 298 PetscFunctionReturn(0); 299 } 300 301 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 302 { \ 303 if (col <= lastcol1) low1 = 0; \ 304 else high1 = nrow1; \ 305 lastcol1 = col; \ 306 while (high1 - low1 > 5) { \ 307 t = (low1 + high1) / 2; \ 308 if (rp1[t] > col) high1 = t; \ 309 else low1 = t; \ 310 } \ 311 for (_i = low1; _i < high1; _i++) { \ 312 if (rp1[_i] > col) break; \ 313 if (rp1[_i] == col) { \ 314 if (addv == ADD_VALUES) { \ 315 ap1[_i] += value; \ 316 /* Not sure LogFlops will slow dow the code or not */ \ 317 (void)PetscLogFlops(1.0); \ 318 } else ap1[_i] = value; \ 319 goto a_noinsert; \ 320 } \ 321 } \ 322 if (value == 0.0 && ignorezeroentries && row != col) { \ 323 low1 = 0; \ 324 high1 = nrow1; \ 325 goto a_noinsert; \ 326 } \ 327 if (nonew == 1) { \ 328 low1 = 0; \ 329 high1 = nrow1; \ 330 goto a_noinsert; \ 331 } \ 332 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 333 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 334 N = nrow1++ - 1; \ 335 a->nz++; \ 336 high1++; \ 337 /* shift up all the later entries in this row */ \ 338 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 339 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 340 rp1[_i] = col; \ 341 ap1[_i] = value; \ 342 A->nonzerostate++; \ 343 a_noinsert:; \ 344 ailen[row] = nrow1; \ 345 } 346 347 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 348 { \ 349 if (col <= lastcol2) low2 = 0; \ 350 else high2 = nrow2; \ 351 lastcol2 = col; \ 352 while (high2 - low2 > 5) { \ 353 t = (low2 + high2) / 2; \ 354 if (rp2[t] > col) high2 = t; \ 355 else low2 = t; \ 356 } \ 357 for (_i = low2; _i < high2; _i++) { \ 358 if (rp2[_i] > col) break; \ 359 if (rp2[_i] == col) { \ 360 if (addv == ADD_VALUES) { \ 361 ap2[_i] += value; \ 362 (void)PetscLogFlops(1.0); \ 363 } else ap2[_i] = value; \ 364 goto b_noinsert; \ 365 } \ 366 } \ 367 if (value == 0.0 && ignorezeroentries) { \ 368 low2 = 0; \ 369 high2 = nrow2; \ 370 goto b_noinsert; \ 371 } \ 372 if (nonew == 1) { \ 373 low2 = 0; \ 374 high2 = nrow2; \ 375 goto b_noinsert; \ 376 } \ 377 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 378 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 379 N = nrow2++ - 1; \ 380 b->nz++; \ 381 high2++; \ 382 /* shift up all the later entries in this row */ \ 383 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 384 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 385 rp2[_i] = col; \ 386 ap2[_i] = value; \ 387 B->nonzerostate++; \ 388 b_noinsert:; \ 389 bilen[row] = nrow2; \ 390 } 391 392 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 393 { 394 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 395 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 396 PetscInt l, *garray = mat->garray, diag; 397 PetscScalar *aa, *ba; 398 399 PetscFunctionBegin; 400 /* code only works for square matrices A */ 401 402 /* find size of row to the left of the diagonal part */ 403 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 404 row = row - diag; 405 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 406 if (garray[b->j[b->i[row] + l]] > diag) break; 407 } 408 if (l) { 409 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 410 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 411 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 412 } 413 414 /* diagonal part */ 415 if (a->i[row + 1] - a->i[row]) { 416 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 417 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 418 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 419 } 420 421 /* right of diagonal part */ 422 if (b->i[row + 1] - b->i[row] - l) { 423 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 424 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 425 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 431 { 432 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 433 PetscScalar value = 0.0; 434 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 435 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 436 PetscBool roworiented = aij->roworiented; 437 438 /* Some Variables required in the macro */ 439 Mat A = aij->A; 440 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 441 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 442 PetscBool ignorezeroentries = a->ignorezeroentries; 443 Mat B = aij->B; 444 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 445 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 446 MatScalar *aa, *ba; 447 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 448 PetscInt nonew; 449 MatScalar *ap1, *ap2; 450 451 PetscFunctionBegin; 452 PetscCall(MatSeqAIJGetArray(A, &aa)); 453 PetscCall(MatSeqAIJGetArray(B, &ba)); 454 for (i = 0; i < m; i++) { 455 if (im[i] < 0) continue; 456 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 457 if (im[i] >= rstart && im[i] < rend) { 458 row = im[i] - rstart; 459 lastcol1 = -1; 460 rp1 = aj + ai[row]; 461 ap1 = aa + ai[row]; 462 rmax1 = aimax[row]; 463 nrow1 = ailen[row]; 464 low1 = 0; 465 high1 = nrow1; 466 lastcol2 = -1; 467 rp2 = bj + bi[row]; 468 ap2 = ba + bi[row]; 469 rmax2 = bimax[row]; 470 nrow2 = bilen[row]; 471 low2 = 0; 472 high2 = nrow2; 473 474 for (j = 0; j < n; j++) { 475 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 476 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 477 if (in[j] >= cstart && in[j] < cend) { 478 col = in[j] - cstart; 479 nonew = a->nonew; 480 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 481 } else if (in[j] < 0) { 482 continue; 483 } else { 484 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 485 if (mat->was_assembled) { 486 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 487 #if defined(PETSC_USE_CTABLE) 488 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */ 489 col--; 490 #else 491 col = aij->colmap[in[j]] - 1; 492 #endif 493 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 494 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 495 col = in[j]; 496 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 497 B = aij->B; 498 b = (Mat_SeqAIJ *)B->data; 499 bimax = b->imax; 500 bi = b->i; 501 bilen = b->ilen; 502 bj = b->j; 503 ba = b->a; 504 rp2 = bj + bi[row]; 505 ap2 = ba + bi[row]; 506 rmax2 = bimax[row]; 507 nrow2 = bilen[row]; 508 low2 = 0; 509 high2 = nrow2; 510 bm = aij->B->rmap->n; 511 ba = b->a; 512 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 513 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 514 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 515 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 516 } 517 } else col = in[j]; 518 nonew = b->nonew; 519 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 520 } 521 } 522 } else { 523 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 524 if (!aij->donotstash) { 525 mat->assembled = PETSC_FALSE; 526 if (roworiented) { 527 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 528 } else { 529 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 530 } 531 } 532 } 533 } 534 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 535 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 536 PetscFunctionReturn(0); 537 } 538 539 /* 540 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 541 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 542 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 543 */ 544 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 545 { 546 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 547 Mat A = aij->A; /* diagonal part of the matrix */ 548 Mat B = aij->B; /* offdiagonal part of the matrix */ 549 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 550 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 551 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 552 PetscInt *ailen = a->ilen, *aj = a->j; 553 PetscInt *bilen = b->ilen, *bj = b->j; 554 PetscInt am = aij->A->rmap->n, j; 555 PetscInt diag_so_far = 0, dnz; 556 PetscInt offd_so_far = 0, onz; 557 558 PetscFunctionBegin; 559 /* Iterate over all rows of the matrix */ 560 for (j = 0; j < am; j++) { 561 dnz = onz = 0; 562 /* Iterate over all non-zero columns of the current row */ 563 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 564 /* If column is in the diagonal */ 565 if (mat_j[col] >= cstart && mat_j[col] < cend) { 566 aj[diag_so_far++] = mat_j[col] - cstart; 567 dnz++; 568 } else { /* off-diagonal entries */ 569 bj[offd_so_far++] = mat_j[col]; 570 onz++; 571 } 572 } 573 ailen[j] = dnz; 574 bilen[j] = onz; 575 } 576 PetscFunctionReturn(0); 577 } 578 579 /* 580 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 581 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 582 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 583 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 584 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 585 */ 586 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 587 { 588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 589 Mat A = aij->A; /* diagonal part of the matrix */ 590 Mat B = aij->B; /* offdiagonal part of the matrix */ 591 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 593 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 594 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 595 PetscInt *ailen = a->ilen, *aj = a->j; 596 PetscInt *bilen = b->ilen, *bj = b->j; 597 PetscInt am = aij->A->rmap->n, j; 598 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 599 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 600 PetscScalar *aa = a->a, *ba = b->a; 601 602 PetscFunctionBegin; 603 /* Iterate over all rows of the matrix */ 604 for (j = 0; j < am; j++) { 605 dnz_row = onz_row = 0; 606 rowstart_offd = full_offd_i[j]; 607 rowstart_diag = full_diag_i[j]; 608 /* Iterate over all non-zero columns of the current row */ 609 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 610 /* If column is in the diagonal */ 611 if (mat_j[col] >= cstart && mat_j[col] < cend) { 612 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 613 aa[rowstart_diag + dnz_row] = mat_a[col]; 614 dnz_row++; 615 } else { /* off-diagonal entries */ 616 bj[rowstart_offd + onz_row] = mat_j[col]; 617 ba[rowstart_offd + onz_row] = mat_a[col]; 618 onz_row++; 619 } 620 } 621 ailen[j] = dnz_row; 622 bilen[j] = onz_row; 623 } 624 PetscFunctionReturn(0); 625 } 626 627 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 628 { 629 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 630 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 631 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 632 633 PetscFunctionBegin; 634 for (i = 0; i < m; i++) { 635 if (idxm[i] < 0) continue; /* negative row */ 636 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 637 if (idxm[i] >= rstart && idxm[i] < rend) { 638 row = idxm[i] - rstart; 639 for (j = 0; j < n; j++) { 640 if (idxn[j] < 0) continue; /* negative column */ 641 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 642 if (idxn[j] >= cstart && idxn[j] < cend) { 643 col = idxn[j] - cstart; 644 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 645 } else { 646 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 647 #if defined(PETSC_USE_CTABLE) 648 PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col)); 649 col--; 650 #else 651 col = aij->colmap[idxn[j]] - 1; 652 #endif 653 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 654 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 655 } 656 } 657 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 658 } 659 PetscFunctionReturn(0); 660 } 661 662 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 663 { 664 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 665 PetscInt nstash, reallocs; 666 667 PetscFunctionBegin; 668 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 669 670 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 671 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 672 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 673 PetscFunctionReturn(0); 674 } 675 676 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 679 PetscMPIInt n; 680 PetscInt i, j, rstart, ncols, flg; 681 PetscInt *row, *col; 682 PetscBool other_disassembled; 683 PetscScalar *val; 684 685 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 686 687 PetscFunctionBegin; 688 if (!aij->donotstash && !mat->nooffprocentries) { 689 while (1) { 690 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 691 if (!flg) break; 692 693 for (i = 0; i < n;) { 694 /* Now identify the consecutive vals belonging to the same row */ 695 for (j = i, rstart = row[j]; j < n; j++) { 696 if (row[j] != rstart) break; 697 } 698 if (j < n) ncols = j - i; 699 else ncols = n - i; 700 /* Now assemble all these values with a single function call */ 701 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 702 i = j; 703 } 704 } 705 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 706 } 707 #if defined(PETSC_HAVE_DEVICE) 708 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 709 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 710 if (mat->boundtocpu) { 711 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 712 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 713 } 714 #endif 715 PetscCall(MatAssemblyBegin(aij->A, mode)); 716 PetscCall(MatAssemblyEnd(aij->A, mode)); 717 718 /* determine if any processor has disassembled, if so we must 719 also disassemble ourself, in order that we may reassemble. */ 720 /* 721 if nonzero structure of submatrix B cannot change then we know that 722 no processor disassembled thus we can skip this stuff 723 */ 724 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 725 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 726 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 727 PetscCall(MatDisAssemble_MPIAIJ(mat)); 728 } 729 } 730 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 731 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 732 #if defined(PETSC_HAVE_DEVICE) 733 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 734 #endif 735 PetscCall(MatAssemblyBegin(aij->B, mode)); 736 PetscCall(MatAssemblyEnd(aij->B, mode)); 737 738 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 739 740 aij->rowvalues = NULL; 741 742 PetscCall(VecDestroy(&aij->diag)); 743 744 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 745 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 746 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 747 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 748 } 749 #if defined(PETSC_HAVE_DEVICE) 750 mat->offloadmask = PETSC_OFFLOAD_BOTH; 751 #endif 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 756 { 757 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 758 759 PetscFunctionBegin; 760 PetscCall(MatZeroEntries(l->A)); 761 PetscCall(MatZeroEntries(l->B)); 762 PetscFunctionReturn(0); 763 } 764 765 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 766 { 767 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 768 PetscObjectState sA, sB; 769 PetscInt *lrows; 770 PetscInt r, len; 771 PetscBool cong, lch, gch; 772 773 PetscFunctionBegin; 774 /* get locally owned rows */ 775 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 776 PetscCall(MatHasCongruentLayouts(A, &cong)); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 783 PetscCall(VecGetArrayRead(x, &xx)); 784 PetscCall(VecGetArray(b, &bb)); 785 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 786 PetscCall(VecRestoreArrayRead(x, &xx)); 787 PetscCall(VecRestoreArray(b, &bb)); 788 } 789 790 sA = mat->A->nonzerostate; 791 sB = mat->B->nonzerostate; 792 793 if (diag != 0.0 && cong) { 794 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 795 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 796 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 797 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 798 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 799 PetscInt nnwA, nnwB; 800 PetscBool nnzA, nnzB; 801 802 nnwA = aijA->nonew; 803 nnwB = aijB->nonew; 804 nnzA = aijA->keepnonzeropattern; 805 nnzB = aijB->keepnonzeropattern; 806 if (!nnzA) { 807 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 808 aijA->nonew = 0; 809 } 810 if (!nnzB) { 811 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 812 aijB->nonew = 0; 813 } 814 /* Must zero here before the next loop */ 815 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 816 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 if (row >= A->cmap->N) continue; 820 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 821 } 822 aijA->nonew = nnwA; 823 aijB->nonew = nnwB; 824 } else { 825 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 826 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 827 } 828 PetscCall(PetscFree(lrows)); 829 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 830 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 831 832 /* reduce nonzerostate */ 833 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 834 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 835 if (gch) A->nonzerostate++; 836 PetscFunctionReturn(0); 837 } 838 839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 840 { 841 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i, j, r, m, len = 0; 844 PetscInt *lrows, *owners = A->rmap->range; 845 PetscMPIInt p = 0; 846 PetscSFNode *rrows; 847 PetscSF sf; 848 const PetscScalar *xx; 849 PetscScalar *bb, *mask, *aij_a; 850 Vec xmask, lmask; 851 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 852 const PetscInt *aj, *ii, *ridx; 853 PetscScalar *aa; 854 855 PetscFunctionBegin; 856 /* Create SF where leaves are input rows and roots are owned rows */ 857 PetscCall(PetscMalloc1(n, &lrows)); 858 for (r = 0; r < n; ++r) lrows[r] = -1; 859 PetscCall(PetscMalloc1(N, &rrows)); 860 for (r = 0; r < N; ++r) { 861 const PetscInt idx = rows[r]; 862 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 863 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 864 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 865 } 866 rrows[r].rank = p; 867 rrows[r].index = rows[r] - owners[p]; 868 } 869 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 870 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 871 /* Collect flags for rows to be zeroed */ 872 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 873 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 874 PetscCall(PetscSFDestroy(&sf)); 875 /* Compress and put in row numbers */ 876 for (r = 0; r < n; ++r) 877 if (lrows[r] >= 0) lrows[len++] = r; 878 /* zero diagonal part of matrix */ 879 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 880 /* handle off diagonal part of matrix */ 881 PetscCall(MatCreateVecs(A, &xmask, NULL)); 882 PetscCall(VecDuplicate(l->lvec, &lmask)); 883 PetscCall(VecGetArray(xmask, &bb)); 884 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 885 PetscCall(VecRestoreArray(xmask, &bb)); 886 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 887 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 888 PetscCall(VecDestroy(&xmask)); 889 if (x && b) { /* this code is buggy when the row and column layout don't match */ 890 PetscBool cong; 891 892 PetscCall(MatHasCongruentLayouts(A, &cong)); 893 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 894 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 895 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 896 PetscCall(VecGetArrayRead(l->lvec, &xx)); 897 PetscCall(VecGetArray(b, &bb)); 898 } 899 PetscCall(VecGetArray(lmask, &mask)); 900 /* remove zeroed rows of off diagonal matrix */ 901 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 902 ii = aij->i; 903 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 904 /* loop over all elements of off process part of matrix zeroing removed columns*/ 905 if (aij->compressedrow.use) { 906 m = aij->compressedrow.nrows; 907 ii = aij->compressedrow.i; 908 ridx = aij->compressedrow.rindex; 909 for (i = 0; i < m; i++) { 910 n = ii[i + 1] - ii[i]; 911 aj = aij->j + ii[i]; 912 aa = aij_a + ii[i]; 913 914 for (j = 0; j < n; j++) { 915 if (PetscAbsScalar(mask[*aj])) { 916 if (b) bb[*ridx] -= *aa * xx[*aj]; 917 *aa = 0.0; 918 } 919 aa++; 920 aj++; 921 } 922 ridx++; 923 } 924 } else { /* do not use compressed row format */ 925 m = l->B->rmap->n; 926 for (i = 0; i < m; i++) { 927 n = ii[i + 1] - ii[i]; 928 aj = aij->j + ii[i]; 929 aa = aij_a + ii[i]; 930 for (j = 0; j < n; j++) { 931 if (PetscAbsScalar(mask[*aj])) { 932 if (b) bb[i] -= *aa * xx[*aj]; 933 *aa = 0.0; 934 } 935 aa++; 936 aj++; 937 } 938 } 939 } 940 if (x && b) { 941 PetscCall(VecRestoreArray(b, &bb)); 942 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 943 } 944 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 945 PetscCall(VecRestoreArray(lmask, &mask)); 946 PetscCall(VecDestroy(&lmask)); 947 PetscCall(PetscFree(lrows)); 948 949 /* only change matrix nonzero state if pattern was allowed to be changed */ 950 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 951 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 952 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 958 { 959 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 960 PetscInt nt; 961 VecScatter Mvctx = a->Mvctx; 962 963 PetscFunctionBegin; 964 PetscCall(VecGetLocalSize(xx, &nt)); 965 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 966 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 967 PetscUseTypeMethod(a->A, mult, xx, yy); 968 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 974 { 975 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 976 977 PetscFunctionBegin; 978 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 979 PetscFunctionReturn(0); 980 } 981 982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 983 { 984 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 985 VecScatter Mvctx = a->Mvctx; 986 987 PetscFunctionBegin; 988 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 990 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 992 PetscFunctionReturn(0); 993 } 994 995 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 996 { 997 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 998 999 PetscFunctionBegin; 1000 /* do nondiagonal part */ 1001 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1002 /* do local part */ 1003 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1004 /* add partial results together */ 1005 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1006 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1011 { 1012 MPI_Comm comm; 1013 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 1014 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 1015 IS Me, Notme; 1016 PetscInt M, N, first, last, *notme, i; 1017 PetscBool lf; 1018 PetscMPIInt size; 1019 1020 PetscFunctionBegin; 1021 /* Easy test: symmetric diagonal block */ 1022 Bij = (Mat_MPIAIJ *)Bmat->data; 1023 Bdia = Bij->A; 1024 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1025 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1026 if (!*f) PetscFunctionReturn(0); 1027 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1028 PetscCallMPI(MPI_Comm_size(comm, &size)); 1029 if (size == 1) PetscFunctionReturn(0); 1030 1031 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1032 PetscCall(MatGetSize(Amat, &M, &N)); 1033 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1034 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1035 for (i = 0; i < first; i++) notme[i] = i; 1036 for (i = last; i < M; i++) notme[i - last + first] = i; 1037 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1038 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1039 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1040 Aoff = Aoffs[0]; 1041 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1042 Boff = Boffs[0]; 1043 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1044 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1045 PetscCall(MatDestroyMatrices(1, &Boffs)); 1046 PetscCall(ISDestroy(&Me)); 1047 PetscCall(ISDestroy(&Notme)); 1048 PetscCall(PetscFree(notme)); 1049 PetscFunctionReturn(0); 1050 } 1051 1052 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1053 { 1054 PetscFunctionBegin; 1055 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1056 PetscFunctionReturn(0); 1057 } 1058 1059 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1060 { 1061 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1062 1063 PetscFunctionBegin; 1064 /* do nondiagonal part */ 1065 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1066 /* do local part */ 1067 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1068 /* add partial results together */ 1069 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1070 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1071 PetscFunctionReturn(0); 1072 } 1073 1074 /* 1075 This only works correctly for square matrices where the subblock A->A is the 1076 diagonal block 1077 */ 1078 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1079 { 1080 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1081 1082 PetscFunctionBegin; 1083 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1084 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1085 PetscCall(MatGetDiagonal(a->A, v)); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 PetscCall(MatScale(a->A, aa)); 1095 PetscCall(MatScale(a->B, aa)); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1100 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1101 { 1102 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1103 1104 PetscFunctionBegin; 1105 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1106 PetscCall(PetscFree(aij->Aperm1)); 1107 PetscCall(PetscFree(aij->Bperm1)); 1108 PetscCall(PetscFree(aij->Ajmap1)); 1109 PetscCall(PetscFree(aij->Bjmap1)); 1110 1111 PetscCall(PetscFree(aij->Aimap2)); 1112 PetscCall(PetscFree(aij->Bimap2)); 1113 PetscCall(PetscFree(aij->Aperm2)); 1114 PetscCall(PetscFree(aij->Bperm2)); 1115 PetscCall(PetscFree(aij->Ajmap2)); 1116 PetscCall(PetscFree(aij->Bjmap2)); 1117 1118 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1119 PetscCall(PetscFree(aij->Cperm1)); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1124 { 1125 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1126 1127 PetscFunctionBegin; 1128 #if defined(PETSC_USE_LOG) 1129 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1130 #endif 1131 PetscCall(MatStashDestroy_Private(&mat->stash)); 1132 PetscCall(VecDestroy(&aij->diag)); 1133 PetscCall(MatDestroy(&aij->A)); 1134 PetscCall(MatDestroy(&aij->B)); 1135 #if defined(PETSC_USE_CTABLE) 1136 PetscCall(PetscTableDestroy(&aij->colmap)); 1137 #else 1138 PetscCall(PetscFree(aij->colmap)); 1139 #endif 1140 PetscCall(PetscFree(aij->garray)); 1141 PetscCall(VecDestroy(&aij->lvec)); 1142 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1143 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 1144 PetscCall(PetscFree(aij->ld)); 1145 1146 /* Free COO */ 1147 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1148 1149 PetscCall(PetscFree(mat->data)); 1150 1151 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1152 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 1153 1154 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1157 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 1159 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 1160 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 1164 #if defined(PETSC_HAVE_CUDA) 1165 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 1166 #endif 1167 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1168 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 1169 #endif 1170 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 1171 #if defined(PETSC_HAVE_ELEMENTAL) 1172 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 1173 #endif 1174 #if defined(PETSC_HAVE_SCALAPACK) 1175 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 1176 #endif 1177 #if defined(PETSC_HAVE_HYPRE) 1178 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 1179 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 1180 #endif 1181 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1182 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 1183 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 1184 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 1186 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 1187 #if defined(PETSC_HAVE_MKL_SPARSE) 1188 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 1189 #endif 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1199 { 1200 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1201 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1202 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1203 const PetscInt *garray = aij->garray; 1204 const PetscScalar *aa, *ba; 1205 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1206 PetscInt *rowlens; 1207 PetscInt *colidxs; 1208 PetscScalar *matvals; 1209 1210 PetscFunctionBegin; 1211 PetscCall(PetscViewerSetUp(viewer)); 1212 1213 M = mat->rmap->N; 1214 N = mat->cmap->N; 1215 m = mat->rmap->n; 1216 rs = mat->rmap->rstart; 1217 cs = mat->cmap->rstart; 1218 nz = A->nz + B->nz; 1219 1220 /* write matrix header */ 1221 header[0] = MAT_FILE_CLASSID; 1222 header[1] = M; 1223 header[2] = N; 1224 header[3] = nz; 1225 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1226 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1227 1228 /* fill in and store row lengths */ 1229 PetscCall(PetscMalloc1(m, &rowlens)); 1230 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1231 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1232 PetscCall(PetscFree(rowlens)); 1233 1234 /* fill in and store column indices */ 1235 PetscCall(PetscMalloc1(nz, &colidxs)); 1236 for (cnt = 0, i = 0; i < m; i++) { 1237 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1238 if (garray[B->j[jb]] > cs) break; 1239 colidxs[cnt++] = garray[B->j[jb]]; 1240 } 1241 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1242 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1243 } 1244 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1245 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1246 PetscCall(PetscFree(colidxs)); 1247 1248 /* fill in and store nonzero values */ 1249 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1250 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1251 PetscCall(PetscMalloc1(nz, &matvals)); 1252 for (cnt = 0, i = 0; i < m; i++) { 1253 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1254 if (garray[B->j[jb]] > cs) break; 1255 matvals[cnt++] = ba[jb]; 1256 } 1257 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1258 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1259 } 1260 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1261 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1262 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1263 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1264 PetscCall(PetscFree(matvals)); 1265 1266 /* write block size option to the viewer's .info file */ 1267 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1268 PetscFunctionReturn(0); 1269 } 1270 1271 #include <petscdraw.h> 1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1273 { 1274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1275 PetscMPIInt rank = aij->rank, size = aij->size; 1276 PetscBool isdraw, iascii, isbinary; 1277 PetscViewer sviewer; 1278 PetscViewerFormat format; 1279 1280 PetscFunctionBegin; 1281 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1282 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1283 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1284 if (iascii) { 1285 PetscCall(PetscViewerGetFormat(viewer, &format)); 1286 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1287 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1288 PetscCall(PetscMalloc1(size, &nz)); 1289 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1290 for (i = 0; i < (PetscInt)size; i++) { 1291 nmax = PetscMax(nmax, nz[i]); 1292 nmin = PetscMin(nmin, nz[i]); 1293 navg += nz[i]; 1294 } 1295 PetscCall(PetscFree(nz)); 1296 navg = navg / size; 1297 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1298 PetscFunctionReturn(0); 1299 } 1300 PetscCall(PetscViewerGetFormat(viewer, &format)); 1301 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1302 MatInfo info; 1303 PetscInt *inodes = NULL; 1304 1305 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1306 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1307 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1308 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1309 if (!inodes) { 1310 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1311 (double)info.memory)); 1312 } else { 1313 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1314 (double)info.memory)); 1315 } 1316 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1317 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1318 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1319 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1320 PetscCall(PetscViewerFlush(viewer)); 1321 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1322 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1323 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1324 PetscFunctionReturn(0); 1325 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1326 PetscInt inodecount, inodelimit, *inodes; 1327 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1328 if (inodes) { 1329 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1330 } else { 1331 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1332 } 1333 PetscFunctionReturn(0); 1334 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1335 PetscFunctionReturn(0); 1336 } 1337 } else if (isbinary) { 1338 if (size == 1) { 1339 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1340 PetscCall(MatView(aij->A, viewer)); 1341 } else { 1342 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1343 } 1344 PetscFunctionReturn(0); 1345 } else if (iascii && size == 1) { 1346 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1347 PetscCall(MatView(aij->A, viewer)); 1348 PetscFunctionReturn(0); 1349 } else if (isdraw) { 1350 PetscDraw draw; 1351 PetscBool isnull; 1352 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1353 PetscCall(PetscDrawIsNull(draw, &isnull)); 1354 if (isnull) PetscFunctionReturn(0); 1355 } 1356 1357 { /* assemble the entire matrix onto first processor */ 1358 Mat A = NULL, Av; 1359 IS isrow, iscol; 1360 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1362 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1363 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1364 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1365 /* The commented code uses MatCreateSubMatrices instead */ 1366 /* 1367 Mat *AA, A = NULL, Av; 1368 IS isrow,iscol; 1369 1370 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1372 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1373 if (rank == 0) { 1374 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1375 A = AA[0]; 1376 Av = AA[0]; 1377 } 1378 PetscCall(MatDestroySubMatrices(1,&AA)); 1379 */ 1380 PetscCall(ISDestroy(&iscol)); 1381 PetscCall(ISDestroy(&isrow)); 1382 /* 1383 Everyone has to call to draw the matrix since the graphics waits are 1384 synchronized across all processors that share the PetscDraw object 1385 */ 1386 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1387 if (rank == 0) { 1388 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1389 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1390 } 1391 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1392 PetscCall(PetscViewerFlush(viewer)); 1393 PetscCall(MatDestroy(&A)); 1394 } 1395 PetscFunctionReturn(0); 1396 } 1397 1398 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1399 { 1400 PetscBool iascii, isdraw, issocket, isbinary; 1401 1402 PetscFunctionBegin; 1403 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1404 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1405 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1406 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1407 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1412 { 1413 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1414 Vec bb1 = NULL; 1415 PetscBool hasop; 1416 1417 PetscFunctionBegin; 1418 if (flag == SOR_APPLY_UPPER) { 1419 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1424 1425 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1426 if (flag & SOR_ZERO_INITIAL_GUESS) { 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1428 its--; 1429 } 1430 1431 while (its--) { 1432 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1433 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1434 1435 /* update rhs: bb1 = bb - B*x */ 1436 PetscCall(VecScale(mat->lvec, -1.0)); 1437 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1438 1439 /* local sweep */ 1440 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1441 } 1442 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1443 if (flag & SOR_ZERO_INITIAL_GUESS) { 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1445 its--; 1446 } 1447 while (its--) { 1448 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1449 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1450 1451 /* update rhs: bb1 = bb - B*x */ 1452 PetscCall(VecScale(mat->lvec, -1.0)); 1453 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1454 1455 /* local sweep */ 1456 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1457 } 1458 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1459 if (flag & SOR_ZERO_INITIAL_GUESS) { 1460 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1461 its--; 1462 } 1463 while (its--) { 1464 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1465 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1466 1467 /* update rhs: bb1 = bb - B*x */ 1468 PetscCall(VecScale(mat->lvec, -1.0)); 1469 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1470 1471 /* local sweep */ 1472 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1473 } 1474 } else if (flag & SOR_EISENSTAT) { 1475 Vec xx1; 1476 1477 PetscCall(VecDuplicate(bb, &xx1)); 1478 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1479 1480 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1481 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1482 if (!mat->diag) { 1483 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1484 PetscCall(MatGetDiagonal(matin, mat->diag)); 1485 } 1486 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1487 if (hasop) { 1488 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1489 } else { 1490 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1491 } 1492 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1493 1494 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1495 1496 /* local sweep */ 1497 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1498 PetscCall(VecAXPY(xx, 1.0, xx1)); 1499 PetscCall(VecDestroy(&xx1)); 1500 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1501 1502 PetscCall(VecDestroy(&bb1)); 1503 1504 matin->factorerrortype = mat->A->factorerrortype; 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1509 { 1510 Mat aA, aB, Aperm; 1511 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1512 PetscScalar *aa, *ba; 1513 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1514 PetscSF rowsf, sf; 1515 IS parcolp = NULL; 1516 PetscBool done; 1517 1518 PetscFunctionBegin; 1519 PetscCall(MatGetLocalSize(A, &m, &n)); 1520 PetscCall(ISGetIndices(rowp, &rwant)); 1521 PetscCall(ISGetIndices(colp, &cwant)); 1522 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1523 1524 /* Invert row permutation to find out where my rows should go */ 1525 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1526 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1527 PetscCall(PetscSFSetFromOptions(rowsf)); 1528 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1529 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1530 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1531 1532 /* Invert column permutation to find out where my columns should go */ 1533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1534 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1535 PetscCall(PetscSFSetFromOptions(sf)); 1536 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1537 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1538 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1539 PetscCall(PetscSFDestroy(&sf)); 1540 1541 PetscCall(ISRestoreIndices(rowp, &rwant)); 1542 PetscCall(ISRestoreIndices(colp, &cwant)); 1543 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1544 1545 /* Find out where my gcols should go */ 1546 PetscCall(MatGetSize(aB, NULL, &ng)); 1547 PetscCall(PetscMalloc1(ng, &gcdest)); 1548 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1549 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1550 PetscCall(PetscSFSetFromOptions(sf)); 1551 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&sf)); 1554 1555 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1556 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1557 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt row = rdest[i]; 1560 PetscMPIInt rowner; 1561 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1562 for (j = ai[i]; j < ai[i + 1]; j++) { 1563 PetscInt col = cdest[aj[j]]; 1564 PetscMPIInt cowner; 1565 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1566 if (rowner == cowner) dnnz[i]++; 1567 else onnz[i]++; 1568 } 1569 for (j = bi[i]; j < bi[i + 1]; j++) { 1570 PetscInt col = gcdest[bj[j]]; 1571 PetscMPIInt cowner; 1572 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1573 if (rowner == cowner) dnnz[i]++; 1574 else onnz[i]++; 1575 } 1576 } 1577 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1578 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1579 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1580 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1581 PetscCall(PetscSFDestroy(&rowsf)); 1582 1583 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1584 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1585 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1586 for (i = 0; i < m; i++) { 1587 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1588 PetscInt j0, rowlen; 1589 rowlen = ai[i + 1] - ai[i]; 1590 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1591 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1592 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1593 } 1594 rowlen = bi[i + 1] - bi[i]; 1595 for (j0 = j = 0; j < rowlen; j0 = j) { 1596 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1597 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1598 } 1599 } 1600 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1601 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1602 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1603 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1604 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1605 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1606 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1607 PetscCall(PetscFree3(work, rdest, cdest)); 1608 PetscCall(PetscFree(gcdest)); 1609 if (parcolp) PetscCall(ISDestroy(&colp)); 1610 *B = Aperm; 1611 PetscFunctionReturn(0); 1612 } 1613 1614 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1615 { 1616 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1617 1618 PetscFunctionBegin; 1619 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1620 if (ghosts) *ghosts = aij->garray; 1621 PetscFunctionReturn(0); 1622 } 1623 1624 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1625 { 1626 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1627 Mat A = mat->A, B = mat->B; 1628 PetscLogDouble isend[5], irecv[5]; 1629 1630 PetscFunctionBegin; 1631 info->block_size = 1.0; 1632 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1633 1634 isend[0] = info->nz_used; 1635 isend[1] = info->nz_allocated; 1636 isend[2] = info->nz_unneeded; 1637 isend[3] = info->memory; 1638 isend[4] = info->mallocs; 1639 1640 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1641 1642 isend[0] += info->nz_used; 1643 isend[1] += info->nz_allocated; 1644 isend[2] += info->nz_unneeded; 1645 isend[3] += info->memory; 1646 isend[4] += info->mallocs; 1647 if (flag == MAT_LOCAL) { 1648 info->nz_used = isend[0]; 1649 info->nz_allocated = isend[1]; 1650 info->nz_unneeded = isend[2]; 1651 info->memory = isend[3]; 1652 info->mallocs = isend[4]; 1653 } else if (flag == MAT_GLOBAL_MAX) { 1654 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1655 1656 info->nz_used = irecv[0]; 1657 info->nz_allocated = irecv[1]; 1658 info->nz_unneeded = irecv[2]; 1659 info->memory = irecv[3]; 1660 info->mallocs = irecv[4]; 1661 } else if (flag == MAT_GLOBAL_SUM) { 1662 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } 1670 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1671 info->fill_ratio_needed = 0; 1672 info->factor_mallocs = 0; 1673 PetscFunctionReturn(0); 1674 } 1675 1676 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1677 { 1678 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1679 1680 PetscFunctionBegin; 1681 switch (op) { 1682 case MAT_NEW_NONZERO_LOCATIONS: 1683 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1684 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1685 case MAT_KEEP_NONZERO_PATTERN: 1686 case MAT_NEW_NONZERO_LOCATION_ERR: 1687 case MAT_USE_INODES: 1688 case MAT_IGNORE_ZERO_ENTRIES: 1689 case MAT_FORM_EXPLICIT_TRANSPOSE: 1690 MatCheckPreallocated(A, 1); 1691 PetscCall(MatSetOption(a->A, op, flg)); 1692 PetscCall(MatSetOption(a->B, op, flg)); 1693 break; 1694 case MAT_ROW_ORIENTED: 1695 MatCheckPreallocated(A, 1); 1696 a->roworiented = flg; 1697 1698 PetscCall(MatSetOption(a->A, op, flg)); 1699 PetscCall(MatSetOption(a->B, op, flg)); 1700 break; 1701 case MAT_FORCE_DIAGONAL_ENTRIES: 1702 case MAT_SORTED_FULL: 1703 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1704 break; 1705 case MAT_IGNORE_OFF_PROC_ENTRIES: 1706 a->donotstash = flg; 1707 break; 1708 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1709 case MAT_SPD: 1710 case MAT_SYMMETRIC: 1711 case MAT_STRUCTURALLY_SYMMETRIC: 1712 case MAT_HERMITIAN: 1713 case MAT_SYMMETRY_ETERNAL: 1714 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1715 case MAT_SPD_ETERNAL: 1716 /* if the diagonal matrix is square it inherits some of the properties above */ 1717 break; 1718 case MAT_SUBMAT_SINGLEIS: 1719 A->submat_singleis = flg; 1720 break; 1721 case MAT_STRUCTURE_ONLY: 1722 /* The option is handled directly by MatSetOption() */ 1723 break; 1724 default: 1725 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1731 { 1732 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1733 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1734 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1735 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1736 PetscInt *cmap, *idx_p; 1737 1738 PetscFunctionBegin; 1739 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1740 mat->getrowactive = PETSC_TRUE; 1741 1742 if (!mat->rowvalues && (idx || v)) { 1743 /* 1744 allocate enough space to hold information from the longest row. 1745 */ 1746 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1747 PetscInt max = 1, tmp; 1748 for (i = 0; i < matin->rmap->n; i++) { 1749 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1750 if (max < tmp) max = tmp; 1751 } 1752 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1753 } 1754 1755 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1756 lrow = row - rstart; 1757 1758 pvA = &vworkA; 1759 pcA = &cworkA; 1760 pvB = &vworkB; 1761 pcB = &cworkB; 1762 if (!v) { 1763 pvA = NULL; 1764 pvB = NULL; 1765 } 1766 if (!idx) { 1767 pcA = NULL; 1768 if (!v) pcB = NULL; 1769 } 1770 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1771 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1772 nztot = nzA + nzB; 1773 1774 cmap = mat->garray; 1775 if (v || idx) { 1776 if (nztot) { 1777 /* Sort by increasing column numbers, assuming A and B already sorted */ 1778 PetscInt imark = -1; 1779 if (v) { 1780 *v = v_p = mat->rowvalues; 1781 for (i = 0; i < nzB; i++) { 1782 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1783 else break; 1784 } 1785 imark = i; 1786 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1787 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1788 } 1789 if (idx) { 1790 *idx = idx_p = mat->rowindices; 1791 if (imark > -1) { 1792 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1793 } else { 1794 for (i = 0; i < nzB; i++) { 1795 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1796 else break; 1797 } 1798 imark = i; 1799 } 1800 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1801 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1802 } 1803 } else { 1804 if (idx) *idx = NULL; 1805 if (v) *v = NULL; 1806 } 1807 } 1808 *nz = nztot; 1809 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1810 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1817 1818 PetscFunctionBegin; 1819 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1820 aij->getrowactive = PETSC_FALSE; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1827 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1828 PetscInt i, j, cstart = mat->cmap->rstart; 1829 PetscReal sum = 0.0; 1830 const MatScalar *v, *amata, *bmata; 1831 1832 PetscFunctionBegin; 1833 if (aij->size == 1) { 1834 PetscCall(MatNorm(aij->A, type, norm)); 1835 } else { 1836 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1837 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1838 if (type == NORM_FROBENIUS) { 1839 v = amata; 1840 for (i = 0; i < amat->nz; i++) { 1841 sum += PetscRealPart(PetscConj(*v) * (*v)); 1842 v++; 1843 } 1844 v = bmata; 1845 for (i = 0; i < bmat->nz; i++) { 1846 sum += PetscRealPart(PetscConj(*v) * (*v)); 1847 v++; 1848 } 1849 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1850 *norm = PetscSqrtReal(*norm); 1851 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1852 } else if (type == NORM_1) { /* max column norm */ 1853 PetscReal *tmp, *tmp2; 1854 PetscInt *jj, *garray = aij->garray; 1855 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1856 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1857 *norm = 0.0; 1858 v = amata; 1859 jj = amat->j; 1860 for (j = 0; j < amat->nz; j++) { 1861 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1862 v++; 1863 } 1864 v = bmata; 1865 jj = bmat->j; 1866 for (j = 0; j < bmat->nz; j++) { 1867 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1868 v++; 1869 } 1870 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1871 for (j = 0; j < mat->cmap->N; j++) { 1872 if (tmp2[j] > *norm) *norm = tmp2[j]; 1873 } 1874 PetscCall(PetscFree(tmp)); 1875 PetscCall(PetscFree(tmp2)); 1876 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1877 } else if (type == NORM_INFINITY) { /* max row norm */ 1878 PetscReal ntemp = 0.0; 1879 for (j = 0; j < aij->A->rmap->n; j++) { 1880 v = amata + amat->i[j]; 1881 sum = 0.0; 1882 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1883 sum += PetscAbsScalar(*v); 1884 v++; 1885 } 1886 v = bmata + bmat->i[j]; 1887 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1888 sum += PetscAbsScalar(*v); 1889 v++; 1890 } 1891 if (sum > ntemp) ntemp = sum; 1892 } 1893 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1894 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1895 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1896 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1897 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1903 { 1904 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1905 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1906 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1907 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1908 Mat B, A_diag, *B_diag; 1909 const MatScalar *pbv, *bv; 1910 1911 PetscFunctionBegin; 1912 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1913 ma = A->rmap->n; 1914 na = A->cmap->n; 1915 mb = a->B->rmap->n; 1916 nb = a->B->cmap->n; 1917 ai = Aloc->i; 1918 aj = Aloc->j; 1919 bi = Bloc->i; 1920 bj = Bloc->j; 1921 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1922 PetscInt *d_nnz, *g_nnz, *o_nnz; 1923 PetscSFNode *oloc; 1924 PETSC_UNUSED PetscSF sf; 1925 1926 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1927 /* compute d_nnz for preallocation */ 1928 PetscCall(PetscArrayzero(d_nnz, na)); 1929 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1930 /* compute local off-diagonal contributions */ 1931 PetscCall(PetscArrayzero(g_nnz, nb)); 1932 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1933 /* map those to global */ 1934 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1935 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1936 PetscCall(PetscSFSetFromOptions(sf)); 1937 PetscCall(PetscArrayzero(o_nnz, na)); 1938 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1939 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1940 PetscCall(PetscSFDestroy(&sf)); 1941 1942 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1943 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1944 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1945 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1946 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1947 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1948 } else { 1949 B = *matout; 1950 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1951 } 1952 1953 b = (Mat_MPIAIJ *)B->data; 1954 A_diag = a->A; 1955 B_diag = &b->A; 1956 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1957 A_diag_ncol = A_diag->cmap->N; 1958 B_diag_ilen = sub_B_diag->ilen; 1959 B_diag_i = sub_B_diag->i; 1960 1961 /* Set ilen for diagonal of B */ 1962 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1963 1964 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1965 very quickly (=without using MatSetValues), because all writes are local. */ 1966 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1967 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1968 1969 /* copy over the B part */ 1970 PetscCall(PetscMalloc1(bi[mb], &cols)); 1971 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1972 pbv = bv; 1973 row = A->rmap->rstart; 1974 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1975 cols_tmp = cols; 1976 for (i = 0; i < mb; i++) { 1977 ncol = bi[i + 1] - bi[i]; 1978 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1979 row++; 1980 pbv += ncol; 1981 cols_tmp += ncol; 1982 } 1983 PetscCall(PetscFree(cols)); 1984 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1985 1986 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1987 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1988 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1989 *matout = B; 1990 } else { 1991 PetscCall(MatHeaderMerge(A, &B)); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1997 { 1998 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1999 Mat a = aij->A, b = aij->B; 2000 PetscInt s1, s2, s3; 2001 2002 PetscFunctionBegin; 2003 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2004 if (rr) { 2005 PetscCall(VecGetLocalSize(rr, &s1)); 2006 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2007 /* Overlap communication with computation. */ 2008 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2009 } 2010 if (ll) { 2011 PetscCall(VecGetLocalSize(ll, &s1)); 2012 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2013 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2014 } 2015 /* scale the diagonal block */ 2016 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2017 2018 if (rr) { 2019 /* Do a scatter end and then right scale the off-diagonal block */ 2020 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2021 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2022 } 2023 PetscFunctionReturn(0); 2024 } 2025 2026 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2027 { 2028 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2029 2030 PetscFunctionBegin; 2031 PetscCall(MatSetUnfactored(a->A)); 2032 PetscFunctionReturn(0); 2033 } 2034 2035 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2036 { 2037 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2038 Mat a, b, c, d; 2039 PetscBool flg; 2040 2041 PetscFunctionBegin; 2042 a = matA->A; 2043 b = matA->B; 2044 c = matB->A; 2045 d = matB->B; 2046 2047 PetscCall(MatEqual(a, c, &flg)); 2048 if (flg) PetscCall(MatEqual(b, d, &flg)); 2049 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2050 PetscFunctionReturn(0); 2051 } 2052 2053 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2054 { 2055 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2056 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2057 2058 PetscFunctionBegin; 2059 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2060 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2061 /* because of the column compression in the off-processor part of the matrix a->B, 2062 the number of columns in a->B and b->B may be different, hence we cannot call 2063 the MatCopy() directly on the two parts. If need be, we can provide a more 2064 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2065 then copying the submatrices */ 2066 PetscCall(MatCopy_Basic(A, B, str)); 2067 } else { 2068 PetscCall(MatCopy(a->A, b->A, str)); 2069 PetscCall(MatCopy(a->B, b->B, str)); 2070 } 2071 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2076 { 2077 PetscFunctionBegin; 2078 PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /* 2083 Computes the number of nonzeros per row needed for preallocation when X and Y 2084 have different nonzero structure. 2085 */ 2086 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2087 { 2088 PetscInt i, j, k, nzx, nzy; 2089 2090 PetscFunctionBegin; 2091 /* Set the number of nonzeros in the new matrix */ 2092 for (i = 0; i < m; i++) { 2093 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2094 nzx = xi[i + 1] - xi[i]; 2095 nzy = yi[i + 1] - yi[i]; 2096 nnz[i] = 0; 2097 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2098 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2099 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2100 nnz[i]++; 2101 } 2102 for (; k < nzy; k++) nnz[i]++; 2103 } 2104 PetscFunctionReturn(0); 2105 } 2106 2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2109 { 2110 PetscInt m = Y->rmap->N; 2111 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2112 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2113 2114 PetscFunctionBegin; 2115 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2116 PetscFunctionReturn(0); 2117 } 2118 2119 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2120 { 2121 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2122 2123 PetscFunctionBegin; 2124 if (str == SAME_NONZERO_PATTERN) { 2125 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2126 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2127 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2128 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2129 } else { 2130 Mat B; 2131 PetscInt *nnz_d, *nnz_o; 2132 2133 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2134 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2135 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2136 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2137 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2138 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2139 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2140 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2141 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2142 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2143 PetscCall(MatHeaderMerge(Y, &B)); 2144 PetscCall(PetscFree(nnz_d)); 2145 PetscCall(PetscFree(nnz_o)); 2146 } 2147 PetscFunctionReturn(0); 2148 } 2149 2150 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2151 2152 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2153 { 2154 PetscFunctionBegin; 2155 if (PetscDefined(USE_COMPLEX)) { 2156 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2157 2158 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2159 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2160 } 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2165 { 2166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2167 2168 PetscFunctionBegin; 2169 PetscCall(MatRealPart(a->A)); 2170 PetscCall(MatRealPart(a->B)); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2175 { 2176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2177 2178 PetscFunctionBegin; 2179 PetscCall(MatImaginaryPart(a->A)); 2180 PetscCall(MatImaginaryPart(a->B)); 2181 PetscFunctionReturn(0); 2182 } 2183 2184 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2185 { 2186 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2187 PetscInt i, *idxb = NULL, m = A->rmap->n; 2188 PetscScalar *va, *vv; 2189 Vec vB, vA; 2190 const PetscScalar *vb; 2191 2192 PetscFunctionBegin; 2193 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2194 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2195 2196 PetscCall(VecGetArrayWrite(vA, &va)); 2197 if (idx) { 2198 for (i = 0; i < m; i++) { 2199 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2200 } 2201 } 2202 2203 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2204 PetscCall(PetscMalloc1(m, &idxb)); 2205 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2206 2207 PetscCall(VecGetArrayWrite(v, &vv)); 2208 PetscCall(VecGetArrayRead(vB, &vb)); 2209 for (i = 0; i < m; i++) { 2210 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2211 vv[i] = vb[i]; 2212 if (idx) idx[i] = a->garray[idxb[i]]; 2213 } else { 2214 vv[i] = va[i]; 2215 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2216 } 2217 } 2218 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2219 PetscCall(VecRestoreArrayWrite(vA, &va)); 2220 PetscCall(VecRestoreArrayRead(vB, &vb)); 2221 PetscCall(PetscFree(idxb)); 2222 PetscCall(VecDestroy(&vA)); 2223 PetscCall(VecDestroy(&vB)); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2228 { 2229 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2230 PetscInt m = A->rmap->n, n = A->cmap->n; 2231 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2232 PetscInt *cmap = mat->garray; 2233 PetscInt *diagIdx, *offdiagIdx; 2234 Vec diagV, offdiagV; 2235 PetscScalar *a, *diagA, *offdiagA; 2236 const PetscScalar *ba, *bav; 2237 PetscInt r, j, col, ncols, *bi, *bj; 2238 Mat B = mat->B; 2239 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2240 2241 PetscFunctionBegin; 2242 /* When a process holds entire A and other processes have no entry */ 2243 if (A->cmap->N == n) { 2244 PetscCall(VecGetArrayWrite(v, &diagA)); 2245 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2246 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2247 PetscCall(VecDestroy(&diagV)); 2248 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2249 PetscFunctionReturn(0); 2250 } else if (n == 0) { 2251 if (m) { 2252 PetscCall(VecGetArrayWrite(v, &a)); 2253 for (r = 0; r < m; r++) { 2254 a[r] = 0.0; 2255 if (idx) idx[r] = -1; 2256 } 2257 PetscCall(VecRestoreArrayWrite(v, &a)); 2258 } 2259 PetscFunctionReturn(0); 2260 } 2261 2262 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2263 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2264 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2265 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2266 2267 /* Get offdiagIdx[] for implicit 0.0 */ 2268 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2269 ba = bav; 2270 bi = b->i; 2271 bj = b->j; 2272 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2273 for (r = 0; r < m; r++) { 2274 ncols = bi[r + 1] - bi[r]; 2275 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2276 offdiagA[r] = *ba; 2277 offdiagIdx[r] = cmap[0]; 2278 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2279 offdiagA[r] = 0.0; 2280 2281 /* Find first hole in the cmap */ 2282 for (j = 0; j < ncols; j++) { 2283 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2284 if (col > j && j < cstart) { 2285 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2286 break; 2287 } else if (col > j + n && j >= cstart) { 2288 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2289 break; 2290 } 2291 } 2292 if (j == ncols && ncols < A->cmap->N - n) { 2293 /* a hole is outside compressed Bcols */ 2294 if (ncols == 0) { 2295 if (cstart) { 2296 offdiagIdx[r] = 0; 2297 } else offdiagIdx[r] = cend; 2298 } else { /* ncols > 0 */ 2299 offdiagIdx[r] = cmap[ncols - 1] + 1; 2300 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2301 } 2302 } 2303 } 2304 2305 for (j = 0; j < ncols; j++) { 2306 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2307 offdiagA[r] = *ba; 2308 offdiagIdx[r] = cmap[*bj]; 2309 } 2310 ba++; 2311 bj++; 2312 } 2313 } 2314 2315 PetscCall(VecGetArrayWrite(v, &a)); 2316 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2317 for (r = 0; r < m; ++r) { 2318 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2319 a[r] = diagA[r]; 2320 if (idx) idx[r] = cstart + diagIdx[r]; 2321 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) { 2324 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2325 idx[r] = cstart + diagIdx[r]; 2326 } else idx[r] = offdiagIdx[r]; 2327 } 2328 } else { 2329 a[r] = offdiagA[r]; 2330 if (idx) idx[r] = offdiagIdx[r]; 2331 } 2332 } 2333 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2334 PetscCall(VecRestoreArrayWrite(v, &a)); 2335 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2336 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2337 PetscCall(VecDestroy(&diagV)); 2338 PetscCall(VecDestroy(&offdiagV)); 2339 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2346 PetscInt m = A->rmap->n, n = A->cmap->n; 2347 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2348 PetscInt *cmap = mat->garray; 2349 PetscInt *diagIdx, *offdiagIdx; 2350 Vec diagV, offdiagV; 2351 PetscScalar *a, *diagA, *offdiagA; 2352 const PetscScalar *ba, *bav; 2353 PetscInt r, j, col, ncols, *bi, *bj; 2354 Mat B = mat->B; 2355 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2356 2357 PetscFunctionBegin; 2358 /* When a process holds entire A and other processes have no entry */ 2359 if (A->cmap->N == n) { 2360 PetscCall(VecGetArrayWrite(v, &diagA)); 2361 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2362 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2363 PetscCall(VecDestroy(&diagV)); 2364 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2365 PetscFunctionReturn(0); 2366 } else if (n == 0) { 2367 if (m) { 2368 PetscCall(VecGetArrayWrite(v, &a)); 2369 for (r = 0; r < m; r++) { 2370 a[r] = PETSC_MAX_REAL; 2371 if (idx) idx[r] = -1; 2372 } 2373 PetscCall(VecRestoreArrayWrite(v, &a)); 2374 } 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2379 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2380 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2381 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2382 2383 /* Get offdiagIdx[] for implicit 0.0 */ 2384 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2385 ba = bav; 2386 bi = b->i; 2387 bj = b->j; 2388 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2389 for (r = 0; r < m; r++) { 2390 ncols = bi[r + 1] - bi[r]; 2391 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2392 offdiagA[r] = *ba; 2393 offdiagIdx[r] = cmap[0]; 2394 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2395 offdiagA[r] = 0.0; 2396 2397 /* Find first hole in the cmap */ 2398 for (j = 0; j < ncols; j++) { 2399 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2400 if (col > j && j < cstart) { 2401 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2402 break; 2403 } else if (col > j + n && j >= cstart) { 2404 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2405 break; 2406 } 2407 } 2408 if (j == ncols && ncols < A->cmap->N - n) { 2409 /* a hole is outside compressed Bcols */ 2410 if (ncols == 0) { 2411 if (cstart) { 2412 offdiagIdx[r] = 0; 2413 } else offdiagIdx[r] = cend; 2414 } else { /* ncols > 0 */ 2415 offdiagIdx[r] = cmap[ncols - 1] + 1; 2416 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2417 } 2418 } 2419 } 2420 2421 for (j = 0; j < ncols; j++) { 2422 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2423 offdiagA[r] = *ba; 2424 offdiagIdx[r] = cmap[*bj]; 2425 } 2426 ba++; 2427 bj++; 2428 } 2429 } 2430 2431 PetscCall(VecGetArrayWrite(v, &a)); 2432 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2433 for (r = 0; r < m; ++r) { 2434 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2435 a[r] = diagA[r]; 2436 if (idx) idx[r] = cstart + diagIdx[r]; 2437 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) { 2440 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2441 idx[r] = cstart + diagIdx[r]; 2442 } else idx[r] = offdiagIdx[r]; 2443 } 2444 } else { 2445 a[r] = offdiagA[r]; 2446 if (idx) idx[r] = offdiagIdx[r]; 2447 } 2448 } 2449 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2450 PetscCall(VecRestoreArrayWrite(v, &a)); 2451 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2452 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2453 PetscCall(VecDestroy(&diagV)); 2454 PetscCall(VecDestroy(&offdiagV)); 2455 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2460 { 2461 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2462 PetscInt m = A->rmap->n, n = A->cmap->n; 2463 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2464 PetscInt *cmap = mat->garray; 2465 PetscInt *diagIdx, *offdiagIdx; 2466 Vec diagV, offdiagV; 2467 PetscScalar *a, *diagA, *offdiagA; 2468 const PetscScalar *ba, *bav; 2469 PetscInt r, j, col, ncols, *bi, *bj; 2470 Mat B = mat->B; 2471 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2472 2473 PetscFunctionBegin; 2474 /* When a process holds entire A and other processes have no entry */ 2475 if (A->cmap->N == n) { 2476 PetscCall(VecGetArrayWrite(v, &diagA)); 2477 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2478 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2479 PetscCall(VecDestroy(&diagV)); 2480 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2481 PetscFunctionReturn(0); 2482 } else if (n == 0) { 2483 if (m) { 2484 PetscCall(VecGetArrayWrite(v, &a)); 2485 for (r = 0; r < m; r++) { 2486 a[r] = PETSC_MIN_REAL; 2487 if (idx) idx[r] = -1; 2488 } 2489 PetscCall(VecRestoreArrayWrite(v, &a)); 2490 } 2491 PetscFunctionReturn(0); 2492 } 2493 2494 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2495 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2496 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2497 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2498 2499 /* Get offdiagIdx[] for implicit 0.0 */ 2500 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2501 ba = bav; 2502 bi = b->i; 2503 bj = b->j; 2504 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2505 for (r = 0; r < m; r++) { 2506 ncols = bi[r + 1] - bi[r]; 2507 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2508 offdiagA[r] = *ba; 2509 offdiagIdx[r] = cmap[0]; 2510 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2511 offdiagA[r] = 0.0; 2512 2513 /* Find first hole in the cmap */ 2514 for (j = 0; j < ncols; j++) { 2515 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2516 if (col > j && j < cstart) { 2517 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2518 break; 2519 } else if (col > j + n && j >= cstart) { 2520 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2521 break; 2522 } 2523 } 2524 if (j == ncols && ncols < A->cmap->N - n) { 2525 /* a hole is outside compressed Bcols */ 2526 if (ncols == 0) { 2527 if (cstart) { 2528 offdiagIdx[r] = 0; 2529 } else offdiagIdx[r] = cend; 2530 } else { /* ncols > 0 */ 2531 offdiagIdx[r] = cmap[ncols - 1] + 1; 2532 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2533 } 2534 } 2535 } 2536 2537 for (j = 0; j < ncols; j++) { 2538 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2539 offdiagA[r] = *ba; 2540 offdiagIdx[r] = cmap[*bj]; 2541 } 2542 ba++; 2543 bj++; 2544 } 2545 } 2546 2547 PetscCall(VecGetArrayWrite(v, &a)); 2548 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2549 for (r = 0; r < m; ++r) { 2550 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 if (idx) idx[r] = cstart + diagIdx[r]; 2553 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) { 2556 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2557 idx[r] = cstart + diagIdx[r]; 2558 } else idx[r] = offdiagIdx[r]; 2559 } 2560 } else { 2561 a[r] = offdiagA[r]; 2562 if (idx) idx[r] = offdiagIdx[r]; 2563 } 2564 } 2565 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2566 PetscCall(VecRestoreArrayWrite(v, &a)); 2567 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2568 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2569 PetscCall(VecDestroy(&diagV)); 2570 PetscCall(VecDestroy(&offdiagV)); 2571 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2576 { 2577 Mat *dummy; 2578 2579 PetscFunctionBegin; 2580 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2581 *newmat = *dummy; 2582 PetscCall(PetscFree(dummy)); 2583 PetscFunctionReturn(0); 2584 } 2585 2586 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2587 { 2588 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2589 2590 PetscFunctionBegin; 2591 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2592 A->factorerrortype = a->A->factorerrortype; 2593 PetscFunctionReturn(0); 2594 } 2595 2596 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2597 { 2598 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2599 2600 PetscFunctionBegin; 2601 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2602 PetscCall(MatSetRandom(aij->A, rctx)); 2603 if (x->assembled) { 2604 PetscCall(MatSetRandom(aij->B, rctx)); 2605 } else { 2606 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2607 } 2608 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2609 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2610 PetscFunctionReturn(0); 2611 } 2612 2613 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2614 { 2615 PetscFunctionBegin; 2616 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2617 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2618 PetscFunctionReturn(0); 2619 } 2620 2621 /*@ 2622 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2623 2624 Not collective 2625 2626 Input Parameter: 2627 . A - the matrix 2628 2629 Output Parameter: 2630 . nz - the number of nonzeros 2631 2632 Level: advanced 2633 2634 .seealso: `MATMPIAIJ`, `Mat` 2635 @*/ 2636 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2637 { 2638 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2639 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2640 2641 PetscFunctionBegin; 2642 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2643 PetscFunctionReturn(0); 2644 } 2645 2646 /*@ 2647 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2648 2649 Collective on A 2650 2651 Input Parameters: 2652 + A - the matrix 2653 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2654 2655 Level: advanced 2656 2657 @*/ 2658 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2659 { 2660 PetscFunctionBegin; 2661 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2662 PetscFunctionReturn(0); 2663 } 2664 2665 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2666 { 2667 PetscBool sc = PETSC_FALSE, flg; 2668 2669 PetscFunctionBegin; 2670 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2671 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2672 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2673 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2674 PetscOptionsHeadEnd(); 2675 PetscFunctionReturn(0); 2676 } 2677 2678 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2679 { 2680 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2681 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2682 2683 PetscFunctionBegin; 2684 if (!Y->preallocated) { 2685 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2686 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2687 PetscInt nonew = aij->nonew; 2688 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2689 aij->nonew = nonew; 2690 } 2691 PetscCall(MatShift_Basic(Y, a)); 2692 PetscFunctionReturn(0); 2693 } 2694 2695 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2696 { 2697 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2698 2699 PetscFunctionBegin; 2700 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2701 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2702 if (d) { 2703 PetscInt rstart; 2704 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2705 *d += rstart; 2706 } 2707 PetscFunctionReturn(0); 2708 } 2709 2710 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2711 { 2712 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2713 2714 PetscFunctionBegin; 2715 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2720 { 2721 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2722 2723 PetscFunctionBegin; 2724 PetscCall(MatEliminateZeros(a->A)); 2725 PetscCall(MatEliminateZeros(a->B)); 2726 PetscFunctionReturn(0); 2727 } 2728 2729 /* -------------------------------------------------------------------*/ 2730 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2731 MatGetRow_MPIAIJ, 2732 MatRestoreRow_MPIAIJ, 2733 MatMult_MPIAIJ, 2734 /* 4*/ MatMultAdd_MPIAIJ, 2735 MatMultTranspose_MPIAIJ, 2736 MatMultTransposeAdd_MPIAIJ, 2737 NULL, 2738 NULL, 2739 NULL, 2740 /*10*/ NULL, 2741 NULL, 2742 NULL, 2743 MatSOR_MPIAIJ, 2744 MatTranspose_MPIAIJ, 2745 /*15*/ MatGetInfo_MPIAIJ, 2746 MatEqual_MPIAIJ, 2747 MatGetDiagonal_MPIAIJ, 2748 MatDiagonalScale_MPIAIJ, 2749 MatNorm_MPIAIJ, 2750 /*20*/ MatAssemblyBegin_MPIAIJ, 2751 MatAssemblyEnd_MPIAIJ, 2752 MatSetOption_MPIAIJ, 2753 MatZeroEntries_MPIAIJ, 2754 /*24*/ MatZeroRows_MPIAIJ, 2755 NULL, 2756 NULL, 2757 NULL, 2758 NULL, 2759 /*29*/ MatSetUp_MPIAIJ, 2760 NULL, 2761 NULL, 2762 MatGetDiagonalBlock_MPIAIJ, 2763 NULL, 2764 /*34*/ MatDuplicate_MPIAIJ, 2765 NULL, 2766 NULL, 2767 NULL, 2768 NULL, 2769 /*39*/ MatAXPY_MPIAIJ, 2770 MatCreateSubMatrices_MPIAIJ, 2771 MatIncreaseOverlap_MPIAIJ, 2772 MatGetValues_MPIAIJ, 2773 MatCopy_MPIAIJ, 2774 /*44*/ MatGetRowMax_MPIAIJ, 2775 MatScale_MPIAIJ, 2776 MatShift_MPIAIJ, 2777 MatDiagonalSet_MPIAIJ, 2778 MatZeroRowsColumns_MPIAIJ, 2779 /*49*/ MatSetRandom_MPIAIJ, 2780 MatGetRowIJ_MPIAIJ, 2781 MatRestoreRowIJ_MPIAIJ, 2782 NULL, 2783 NULL, 2784 /*54*/ MatFDColoringCreate_MPIXAIJ, 2785 NULL, 2786 MatSetUnfactored_MPIAIJ, 2787 MatPermute_MPIAIJ, 2788 NULL, 2789 /*59*/ MatCreateSubMatrix_MPIAIJ, 2790 MatDestroy_MPIAIJ, 2791 MatView_MPIAIJ, 2792 NULL, 2793 NULL, 2794 /*64*/ NULL, 2795 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2800 MatGetRowMinAbs_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*75*/ MatFDColoringApply_AIJ, 2806 MatSetFromOptions_MPIAIJ, 2807 NULL, 2808 NULL, 2809 MatFindZeroDiagonals_MPIAIJ, 2810 /*80*/ NULL, 2811 NULL, 2812 NULL, 2813 /*83*/ MatLoad_MPIAIJ, 2814 MatIsSymmetric_MPIAIJ, 2815 NULL, 2816 NULL, 2817 NULL, 2818 NULL, 2819 /*89*/ NULL, 2820 NULL, 2821 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2822 NULL, 2823 NULL, 2824 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2825 NULL, 2826 NULL, 2827 NULL, 2828 MatBindToCPU_MPIAIJ, 2829 /*99*/ MatProductSetFromOptions_MPIAIJ, 2830 NULL, 2831 NULL, 2832 MatConjugate_MPIAIJ, 2833 NULL, 2834 /*104*/ MatSetValuesRow_MPIAIJ, 2835 MatRealPart_MPIAIJ, 2836 MatImaginaryPart_MPIAIJ, 2837 NULL, 2838 NULL, 2839 /*109*/ NULL, 2840 NULL, 2841 MatGetRowMin_MPIAIJ, 2842 NULL, 2843 MatMissingDiagonal_MPIAIJ, 2844 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2845 NULL, 2846 MatGetGhosts_MPIAIJ, 2847 NULL, 2848 NULL, 2849 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2850 NULL, 2851 NULL, 2852 NULL, 2853 MatGetMultiProcBlock_MPIAIJ, 2854 /*124*/ MatFindNonzeroRows_MPIAIJ, 2855 MatGetColumnReductions_MPIAIJ, 2856 MatInvertBlockDiagonal_MPIAIJ, 2857 MatInvertVariableBlockDiagonal_MPIAIJ, 2858 MatCreateSubMatricesMPI_MPIAIJ, 2859 /*129*/ NULL, 2860 NULL, 2861 NULL, 2862 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2863 NULL, 2864 /*134*/ NULL, 2865 NULL, 2866 NULL, 2867 NULL, 2868 NULL, 2869 /*139*/ MatSetBlockSizes_MPIAIJ, 2870 NULL, 2871 NULL, 2872 MatFDColoringSetUp_MPIXAIJ, 2873 MatFindOffBlockDiagonalEntries_MPIAIJ, 2874 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2875 /*145*/ NULL, 2876 NULL, 2877 NULL, 2878 MatCreateGraph_Simple_AIJ, 2879 NULL, 2880 /*150*/ NULL, 2881 MatEliminateZeros_MPIAIJ}; 2882 2883 /* ----------------------------------------------------------------------------------------*/ 2884 2885 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2886 { 2887 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2888 2889 PetscFunctionBegin; 2890 PetscCall(MatStoreValues(aij->A)); 2891 PetscCall(MatStoreValues(aij->B)); 2892 PetscFunctionReturn(0); 2893 } 2894 2895 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2896 { 2897 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2898 2899 PetscFunctionBegin; 2900 PetscCall(MatRetrieveValues(aij->A)); 2901 PetscCall(MatRetrieveValues(aij->B)); 2902 PetscFunctionReturn(0); 2903 } 2904 2905 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2906 { 2907 Mat_MPIAIJ *b; 2908 PetscMPIInt size; 2909 2910 PetscFunctionBegin; 2911 PetscCall(PetscLayoutSetUp(B->rmap)); 2912 PetscCall(PetscLayoutSetUp(B->cmap)); 2913 b = (Mat_MPIAIJ *)B->data; 2914 2915 #if defined(PETSC_USE_CTABLE) 2916 PetscCall(PetscTableDestroy(&b->colmap)); 2917 #else 2918 PetscCall(PetscFree(b->colmap)); 2919 #endif 2920 PetscCall(PetscFree(b->garray)); 2921 PetscCall(VecDestroy(&b->lvec)); 2922 PetscCall(VecScatterDestroy(&b->Mvctx)); 2923 2924 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2925 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2926 PetscCall(MatDestroy(&b->B)); 2927 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2928 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2929 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2930 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2931 2932 if (!B->preallocated) { 2933 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2934 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2935 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2936 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2937 } 2938 2939 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2940 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2941 B->preallocated = PETSC_TRUE; 2942 B->was_assembled = PETSC_FALSE; 2943 B->assembled = PETSC_FALSE; 2944 PetscFunctionReturn(0); 2945 } 2946 2947 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2948 { 2949 Mat_MPIAIJ *b; 2950 2951 PetscFunctionBegin; 2952 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2953 PetscCall(PetscLayoutSetUp(B->rmap)); 2954 PetscCall(PetscLayoutSetUp(B->cmap)); 2955 b = (Mat_MPIAIJ *)B->data; 2956 2957 #if defined(PETSC_USE_CTABLE) 2958 PetscCall(PetscTableDestroy(&b->colmap)); 2959 #else 2960 PetscCall(PetscFree(b->colmap)); 2961 #endif 2962 PetscCall(PetscFree(b->garray)); 2963 PetscCall(VecDestroy(&b->lvec)); 2964 PetscCall(VecScatterDestroy(&b->Mvctx)); 2965 2966 PetscCall(MatResetPreallocation(b->A)); 2967 PetscCall(MatResetPreallocation(b->B)); 2968 B->preallocated = PETSC_TRUE; 2969 B->was_assembled = PETSC_FALSE; 2970 B->assembled = PETSC_FALSE; 2971 PetscFunctionReturn(0); 2972 } 2973 2974 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2975 { 2976 Mat mat; 2977 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2978 2979 PetscFunctionBegin; 2980 *newmat = NULL; 2981 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2982 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2983 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2984 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2985 a = (Mat_MPIAIJ *)mat->data; 2986 2987 mat->factortype = matin->factortype; 2988 mat->assembled = matin->assembled; 2989 mat->insertmode = NOT_SET_VALUES; 2990 mat->preallocated = matin->preallocated; 2991 2992 a->size = oldmat->size; 2993 a->rank = oldmat->rank; 2994 a->donotstash = oldmat->donotstash; 2995 a->roworiented = oldmat->roworiented; 2996 a->rowindices = NULL; 2997 a->rowvalues = NULL; 2998 a->getrowactive = PETSC_FALSE; 2999 3000 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3001 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3002 3003 if (oldmat->colmap) { 3004 #if defined(PETSC_USE_CTABLE) 3005 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 3006 #else 3007 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3008 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3009 #endif 3010 } else a->colmap = NULL; 3011 if (oldmat->garray) { 3012 PetscInt len; 3013 len = oldmat->B->cmap->n; 3014 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3015 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3016 } else a->garray = NULL; 3017 3018 /* It may happen MatDuplicate is called with a non-assembled matrix 3019 In fact, MatDuplicate only requires the matrix to be preallocated 3020 This may happen inside a DMCreateMatrix_Shell */ 3021 if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); } 3022 if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); } 3023 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3024 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3025 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3026 *newmat = mat; 3027 PetscFunctionReturn(0); 3028 } 3029 3030 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3031 { 3032 PetscBool isbinary, ishdf5; 3033 3034 PetscFunctionBegin; 3035 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3036 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3037 /* force binary viewer to load .info file if it has not yet done so */ 3038 PetscCall(PetscViewerSetUp(viewer)); 3039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3040 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3041 if (isbinary) { 3042 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3043 } else if (ishdf5) { 3044 #if defined(PETSC_HAVE_HDF5) 3045 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3046 #else 3047 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3048 #endif 3049 } else { 3050 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3051 } 3052 PetscFunctionReturn(0); 3053 } 3054 3055 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3056 { 3057 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3058 PetscInt *rowidxs, *colidxs; 3059 PetscScalar *matvals; 3060 3061 PetscFunctionBegin; 3062 PetscCall(PetscViewerSetUp(viewer)); 3063 3064 /* read in matrix header */ 3065 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3066 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3067 M = header[1]; 3068 N = header[2]; 3069 nz = header[3]; 3070 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3071 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3072 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3073 3074 /* set block sizes from the viewer's .info file */ 3075 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3076 /* set global sizes if not set already */ 3077 if (mat->rmap->N < 0) mat->rmap->N = M; 3078 if (mat->cmap->N < 0) mat->cmap->N = N; 3079 PetscCall(PetscLayoutSetUp(mat->rmap)); 3080 PetscCall(PetscLayoutSetUp(mat->cmap)); 3081 3082 /* check if the matrix sizes are correct */ 3083 PetscCall(MatGetSize(mat, &rows, &cols)); 3084 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3085 3086 /* read in row lengths and build row indices */ 3087 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3088 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3089 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3090 rowidxs[0] = 0; 3091 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3092 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3093 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3094 /* read in column indices and matrix values */ 3095 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3096 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3097 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3098 /* store matrix indices and values */ 3099 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3100 PetscCall(PetscFree(rowidxs)); 3101 PetscCall(PetscFree2(colidxs, matvals)); 3102 PetscFunctionReturn(0); 3103 } 3104 3105 /* Not scalable because of ISAllGather() unless getting all columns. */ 3106 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3107 { 3108 IS iscol_local; 3109 PetscBool isstride; 3110 PetscMPIInt lisstride = 0, gisstride; 3111 3112 PetscFunctionBegin; 3113 /* check if we are grabbing all columns*/ 3114 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3115 3116 if (isstride) { 3117 PetscInt start, len, mstart, mlen; 3118 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3119 PetscCall(ISGetLocalSize(iscol, &len)); 3120 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3121 if (mstart == start && mlen - mstart == len) lisstride = 1; 3122 } 3123 3124 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3125 if (gisstride) { 3126 PetscInt N; 3127 PetscCall(MatGetSize(mat, NULL, &N)); 3128 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3129 PetscCall(ISSetIdentity(iscol_local)); 3130 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3131 } else { 3132 PetscInt cbs; 3133 PetscCall(ISGetBlockSize(iscol, &cbs)); 3134 PetscCall(ISAllGather(iscol, &iscol_local)); 3135 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3136 } 3137 3138 *isseq = iscol_local; 3139 PetscFunctionReturn(0); 3140 } 3141 3142 /* 3143 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3144 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3145 3146 Input Parameters: 3147 mat - matrix 3148 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3149 i.e., mat->rstart <= isrow[i] < mat->rend 3150 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3151 i.e., mat->cstart <= iscol[i] < mat->cend 3152 Output Parameter: 3153 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3154 iscol_o - sequential column index set for retrieving mat->B 3155 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3156 */ 3157 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3158 { 3159 Vec x, cmap; 3160 const PetscInt *is_idx; 3161 PetscScalar *xarray, *cmaparray; 3162 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3164 Mat B = a->B; 3165 Vec lvec = a->lvec, lcmap; 3166 PetscInt i, cstart, cend, Bn = B->cmap->N; 3167 MPI_Comm comm; 3168 VecScatter Mvctx = a->Mvctx; 3169 3170 PetscFunctionBegin; 3171 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3172 PetscCall(ISGetLocalSize(iscol, &ncols)); 3173 3174 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3175 PetscCall(MatCreateVecs(mat, &x, NULL)); 3176 PetscCall(VecSet(x, -1.0)); 3177 PetscCall(VecDuplicate(x, &cmap)); 3178 PetscCall(VecSet(cmap, -1.0)); 3179 3180 /* Get start indices */ 3181 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3182 isstart -= ncols; 3183 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3184 3185 PetscCall(ISGetIndices(iscol, &is_idx)); 3186 PetscCall(VecGetArray(x, &xarray)); 3187 PetscCall(VecGetArray(cmap, &cmaparray)); 3188 PetscCall(PetscMalloc1(ncols, &idx)); 3189 for (i = 0; i < ncols; i++) { 3190 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3191 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3192 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3193 } 3194 PetscCall(VecRestoreArray(x, &xarray)); 3195 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3196 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3197 3198 /* Get iscol_d */ 3199 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3200 PetscCall(ISGetBlockSize(iscol, &i)); 3201 PetscCall(ISSetBlockSize(*iscol_d, i)); 3202 3203 /* Get isrow_d */ 3204 PetscCall(ISGetLocalSize(isrow, &m)); 3205 rstart = mat->rmap->rstart; 3206 PetscCall(PetscMalloc1(m, &idx)); 3207 PetscCall(ISGetIndices(isrow, &is_idx)); 3208 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3209 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3210 3211 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3212 PetscCall(ISGetBlockSize(isrow, &i)); 3213 PetscCall(ISSetBlockSize(*isrow_d, i)); 3214 3215 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3216 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3217 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3218 3219 PetscCall(VecDuplicate(lvec, &lcmap)); 3220 3221 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3222 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3223 3224 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3225 /* off-process column indices */ 3226 count = 0; 3227 PetscCall(PetscMalloc1(Bn, &idx)); 3228 PetscCall(PetscMalloc1(Bn, &cmap1)); 3229 3230 PetscCall(VecGetArray(lvec, &xarray)); 3231 PetscCall(VecGetArray(lcmap, &cmaparray)); 3232 for (i = 0; i < Bn; i++) { 3233 if (PetscRealPart(xarray[i]) > -1.0) { 3234 idx[count] = i; /* local column index in off-diagonal part B */ 3235 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3236 count++; 3237 } 3238 } 3239 PetscCall(VecRestoreArray(lvec, &xarray)); 3240 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3241 3242 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3243 /* cannot ensure iscol_o has same blocksize as iscol! */ 3244 3245 PetscCall(PetscFree(idx)); 3246 *garray = cmap1; 3247 3248 PetscCall(VecDestroy(&x)); 3249 PetscCall(VecDestroy(&cmap)); 3250 PetscCall(VecDestroy(&lcmap)); 3251 PetscFunctionReturn(0); 3252 } 3253 3254 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3255 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3256 { 3257 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3258 Mat M = NULL; 3259 MPI_Comm comm; 3260 IS iscol_d, isrow_d, iscol_o; 3261 Mat Asub = NULL, Bsub = NULL; 3262 PetscInt n; 3263 3264 PetscFunctionBegin; 3265 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3266 3267 if (call == MAT_REUSE_MATRIX) { 3268 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3269 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3270 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3271 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3273 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3276 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3277 3278 /* Update diagonal and off-diagonal portions of submat */ 3279 asub = (Mat_MPIAIJ *)(*submat)->data; 3280 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3281 PetscCall(ISGetLocalSize(iscol_o, &n)); 3282 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3283 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3284 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3285 3286 } else { /* call == MAT_INITIAL_MATRIX) */ 3287 const PetscInt *garray; 3288 PetscInt BsubN; 3289 3290 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3291 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3292 3293 /* Create local submatrices Asub and Bsub */ 3294 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3295 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3296 3297 /* Create submatrix M */ 3298 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3299 3300 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3301 asub = (Mat_MPIAIJ *)M->data; 3302 3303 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3304 n = asub->B->cmap->N; 3305 if (BsubN > n) { 3306 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3307 const PetscInt *idx; 3308 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3309 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3310 3311 PetscCall(PetscMalloc1(n, &idx_new)); 3312 j = 0; 3313 PetscCall(ISGetIndices(iscol_o, &idx)); 3314 for (i = 0; i < n; i++) { 3315 if (j >= BsubN) break; 3316 while (subgarray[i] > garray[j]) j++; 3317 3318 if (subgarray[i] == garray[j]) { 3319 idx_new[i] = idx[j++]; 3320 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3321 } 3322 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3323 3324 PetscCall(ISDestroy(&iscol_o)); 3325 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3326 3327 } else if (BsubN < n) { 3328 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3329 } 3330 3331 PetscCall(PetscFree(garray)); 3332 *submat = M; 3333 3334 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3335 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3336 PetscCall(ISDestroy(&isrow_d)); 3337 3338 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3339 PetscCall(ISDestroy(&iscol_d)); 3340 3341 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3342 PetscCall(ISDestroy(&iscol_o)); 3343 } 3344 PetscFunctionReturn(0); 3345 } 3346 3347 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3348 { 3349 IS iscol_local = NULL, isrow_d; 3350 PetscInt csize; 3351 PetscInt n, i, j, start, end; 3352 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3353 MPI_Comm comm; 3354 3355 PetscFunctionBegin; 3356 /* If isrow has same processor distribution as mat, 3357 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3358 if (call == MAT_REUSE_MATRIX) { 3359 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3360 if (isrow_d) { 3361 sameRowDist = PETSC_TRUE; 3362 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3363 } else { 3364 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3365 if (iscol_local) { 3366 sameRowDist = PETSC_TRUE; 3367 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3368 } 3369 } 3370 } else { 3371 /* Check if isrow has same processor distribution as mat */ 3372 sameDist[0] = PETSC_FALSE; 3373 PetscCall(ISGetLocalSize(isrow, &n)); 3374 if (!n) { 3375 sameDist[0] = PETSC_TRUE; 3376 } else { 3377 PetscCall(ISGetMinMax(isrow, &i, &j)); 3378 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3379 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3380 } 3381 3382 /* Check if iscol has same processor distribution as mat */ 3383 sameDist[1] = PETSC_FALSE; 3384 PetscCall(ISGetLocalSize(iscol, &n)); 3385 if (!n) { 3386 sameDist[1] = PETSC_TRUE; 3387 } else { 3388 PetscCall(ISGetMinMax(iscol, &i, &j)); 3389 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3390 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3391 } 3392 3393 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3394 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3395 sameRowDist = tsameDist[0]; 3396 } 3397 3398 if (sameRowDist) { 3399 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3400 /* isrow and iscol have same processor distribution as mat */ 3401 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3402 PetscFunctionReturn(0); 3403 } else { /* sameRowDist */ 3404 /* isrow has same processor distribution as mat */ 3405 if (call == MAT_INITIAL_MATRIX) { 3406 PetscBool sorted; 3407 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3408 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3409 PetscCall(ISGetSize(iscol, &i)); 3410 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3411 3412 PetscCall(ISSorted(iscol_local, &sorted)); 3413 if (sorted) { 3414 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3415 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3416 PetscFunctionReturn(0); 3417 } 3418 } else { /* call == MAT_REUSE_MATRIX */ 3419 IS iscol_sub; 3420 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3421 if (iscol_sub) { 3422 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3423 PetscFunctionReturn(0); 3424 } 3425 } 3426 } 3427 } 3428 3429 /* General case: iscol -> iscol_local which has global size of iscol */ 3430 if (call == MAT_REUSE_MATRIX) { 3431 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3432 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3433 } else { 3434 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3435 } 3436 3437 PetscCall(ISGetLocalSize(iscol, &csize)); 3438 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3439 3440 if (call == MAT_INITIAL_MATRIX) { 3441 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3442 PetscCall(ISDestroy(&iscol_local)); 3443 } 3444 PetscFunctionReturn(0); 3445 } 3446 3447 /*@C 3448 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3449 and "off-diagonal" part of the matrix in CSR format. 3450 3451 Collective 3452 3453 Input Parameters: 3454 + comm - MPI communicator 3455 . A - "diagonal" portion of matrix 3456 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3457 - garray - global index of B columns 3458 3459 Output Parameter: 3460 . mat - the matrix, with input A as its local diagonal matrix 3461 Level: advanced 3462 3463 Notes: 3464 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3465 3466 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3467 3468 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3469 @*/ 3470 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3471 { 3472 Mat_MPIAIJ *maij; 3473 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3474 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3475 const PetscScalar *oa; 3476 Mat Bnew; 3477 PetscInt m, n, N; 3478 MatType mpi_mat_type; 3479 3480 PetscFunctionBegin; 3481 PetscCall(MatCreate(comm, mat)); 3482 PetscCall(MatGetSize(A, &m, &n)); 3483 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3484 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3485 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3486 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3487 3488 /* Get global columns of mat */ 3489 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3490 3491 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3492 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3493 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3494 PetscCall(MatSetType(*mat, mpi_mat_type)); 3495 3496 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3497 maij = (Mat_MPIAIJ *)(*mat)->data; 3498 3499 (*mat)->preallocated = PETSC_TRUE; 3500 3501 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3502 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3503 3504 /* Set A as diagonal portion of *mat */ 3505 maij->A = A; 3506 3507 nz = oi[m]; 3508 for (i = 0; i < nz; i++) { 3509 col = oj[i]; 3510 oj[i] = garray[col]; 3511 } 3512 3513 /* Set Bnew as off-diagonal portion of *mat */ 3514 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3515 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3516 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3517 bnew = (Mat_SeqAIJ *)Bnew->data; 3518 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3519 maij->B = Bnew; 3520 3521 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3522 3523 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3524 b->free_a = PETSC_FALSE; 3525 b->free_ij = PETSC_FALSE; 3526 PetscCall(MatDestroy(&B)); 3527 3528 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3529 bnew->free_a = PETSC_TRUE; 3530 bnew->free_ij = PETSC_TRUE; 3531 3532 /* condense columns of maij->B */ 3533 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3534 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3535 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3536 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3537 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3538 PetscFunctionReturn(0); 3539 } 3540 3541 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3542 3543 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3544 { 3545 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3546 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3547 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3548 Mat M, Msub, B = a->B; 3549 MatScalar *aa; 3550 Mat_SeqAIJ *aij; 3551 PetscInt *garray = a->garray, *colsub, Ncols; 3552 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3553 IS iscol_sub, iscmap; 3554 const PetscInt *is_idx, *cmap; 3555 PetscBool allcolumns = PETSC_FALSE; 3556 MPI_Comm comm; 3557 3558 PetscFunctionBegin; 3559 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3560 if (call == MAT_REUSE_MATRIX) { 3561 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3562 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3563 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3564 3565 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3566 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3567 3568 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3569 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3570 3571 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3572 3573 } else { /* call == MAT_INITIAL_MATRIX) */ 3574 PetscBool flg; 3575 3576 PetscCall(ISGetLocalSize(iscol, &n)); 3577 PetscCall(ISGetSize(iscol, &Ncols)); 3578 3579 /* (1) iscol -> nonscalable iscol_local */ 3580 /* Check for special case: each processor gets entire matrix columns */ 3581 PetscCall(ISIdentity(iscol_local, &flg)); 3582 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3583 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3584 if (allcolumns) { 3585 iscol_sub = iscol_local; 3586 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3587 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3588 3589 } else { 3590 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3591 PetscInt *idx, *cmap1, k; 3592 PetscCall(PetscMalloc1(Ncols, &idx)); 3593 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3594 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3595 count = 0; 3596 k = 0; 3597 for (i = 0; i < Ncols; i++) { 3598 j = is_idx[i]; 3599 if (j >= cstart && j < cend) { 3600 /* diagonal part of mat */ 3601 idx[count] = j; 3602 cmap1[count++] = i; /* column index in submat */ 3603 } else if (Bn) { 3604 /* off-diagonal part of mat */ 3605 if (j == garray[k]) { 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } else if (j > garray[k]) { 3609 while (j > garray[k] && k < Bn - 1) k++; 3610 if (j == garray[k]) { 3611 idx[count] = j; 3612 cmap1[count++] = i; /* column index in submat */ 3613 } 3614 } 3615 } 3616 } 3617 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3618 3619 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3620 PetscCall(ISGetBlockSize(iscol, &cbs)); 3621 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3622 3623 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3624 } 3625 3626 /* (3) Create sequential Msub */ 3627 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3628 } 3629 3630 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3631 aij = (Mat_SeqAIJ *)(Msub)->data; 3632 ii = aij->i; 3633 PetscCall(ISGetIndices(iscmap, &cmap)); 3634 3635 /* 3636 m - number of local rows 3637 Ncols - number of columns (same on all processors) 3638 rstart - first row in new global matrix generated 3639 */ 3640 PetscCall(MatGetSize(Msub, &m, NULL)); 3641 3642 if (call == MAT_INITIAL_MATRIX) { 3643 /* (4) Create parallel newmat */ 3644 PetscMPIInt rank, size; 3645 PetscInt csize; 3646 3647 PetscCallMPI(MPI_Comm_size(comm, &size)); 3648 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3649 3650 /* 3651 Determine the number of non-zeros in the diagonal and off-diagonal 3652 portions of the matrix in order to do correct preallocation 3653 */ 3654 3655 /* first get start and end of "diagonal" columns */ 3656 PetscCall(ISGetLocalSize(iscol, &csize)); 3657 if (csize == PETSC_DECIDE) { 3658 PetscCall(ISGetSize(isrow, &mglobal)); 3659 if (mglobal == Ncols) { /* square matrix */ 3660 nlocal = m; 3661 } else { 3662 nlocal = Ncols / size + ((Ncols % size) > rank); 3663 } 3664 } else { 3665 nlocal = csize; 3666 } 3667 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3668 rstart = rend - nlocal; 3669 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3670 3671 /* next, compute all the lengths */ 3672 jj = aij->j; 3673 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3674 olens = dlens + m; 3675 for (i = 0; i < m; i++) { 3676 jend = ii[i + 1] - ii[i]; 3677 olen = 0; 3678 dlen = 0; 3679 for (j = 0; j < jend; j++) { 3680 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3681 else dlen++; 3682 jj++; 3683 } 3684 olens[i] = olen; 3685 dlens[i] = dlen; 3686 } 3687 3688 PetscCall(ISGetBlockSize(isrow, &bs)); 3689 PetscCall(ISGetBlockSize(iscol, &cbs)); 3690 3691 PetscCall(MatCreate(comm, &M)); 3692 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3693 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3694 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3695 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3696 PetscCall(PetscFree(dlens)); 3697 3698 } else { /* call == MAT_REUSE_MATRIX */ 3699 M = *newmat; 3700 PetscCall(MatGetLocalSize(M, &i, NULL)); 3701 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3702 PetscCall(MatZeroEntries(M)); 3703 /* 3704 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3705 rather than the slower MatSetValues(). 3706 */ 3707 M->was_assembled = PETSC_TRUE; 3708 M->assembled = PETSC_FALSE; 3709 } 3710 3711 /* (5) Set values of Msub to *newmat */ 3712 PetscCall(PetscMalloc1(count, &colsub)); 3713 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3714 3715 jj = aij->j; 3716 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3717 for (i = 0; i < m; i++) { 3718 row = rstart + i; 3719 nz = ii[i + 1] - ii[i]; 3720 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3721 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3722 jj += nz; 3723 aa += nz; 3724 } 3725 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3726 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3727 3728 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3729 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3730 3731 PetscCall(PetscFree(colsub)); 3732 3733 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3734 if (call == MAT_INITIAL_MATRIX) { 3735 *newmat = M; 3736 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3737 PetscCall(MatDestroy(&Msub)); 3738 3739 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3740 PetscCall(ISDestroy(&iscol_sub)); 3741 3742 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3743 PetscCall(ISDestroy(&iscmap)); 3744 3745 if (iscol_local) { 3746 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3747 PetscCall(ISDestroy(&iscol_local)); 3748 } 3749 } 3750 PetscFunctionReturn(0); 3751 } 3752 3753 /* 3754 Not great since it makes two copies of the submatrix, first an SeqAIJ 3755 in local and then by concatenating the local matrices the end result. 3756 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3757 3758 This requires a sequential iscol with all indices. 3759 */ 3760 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3761 { 3762 PetscMPIInt rank, size; 3763 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3764 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3765 Mat M, Mreuse; 3766 MatScalar *aa, *vwork; 3767 MPI_Comm comm; 3768 Mat_SeqAIJ *aij; 3769 PetscBool colflag, allcolumns = PETSC_FALSE; 3770 3771 PetscFunctionBegin; 3772 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3773 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3774 PetscCallMPI(MPI_Comm_size(comm, &size)); 3775 3776 /* Check for special case: each processor gets entire matrix columns */ 3777 PetscCall(ISIdentity(iscol, &colflag)); 3778 PetscCall(ISGetLocalSize(iscol, &n)); 3779 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3780 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3781 3782 if (call == MAT_REUSE_MATRIX) { 3783 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3784 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3785 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3786 } else { 3787 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3788 } 3789 3790 /* 3791 m - number of local rows 3792 n - number of columns (same on all processors) 3793 rstart - first row in new global matrix generated 3794 */ 3795 PetscCall(MatGetSize(Mreuse, &m, &n)); 3796 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3797 if (call == MAT_INITIAL_MATRIX) { 3798 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3799 ii = aij->i; 3800 jj = aij->j; 3801 3802 /* 3803 Determine the number of non-zeros in the diagonal and off-diagonal 3804 portions of the matrix in order to do correct preallocation 3805 */ 3806 3807 /* first get start and end of "diagonal" columns */ 3808 if (csize == PETSC_DECIDE) { 3809 PetscCall(ISGetSize(isrow, &mglobal)); 3810 if (mglobal == n) { /* square matrix */ 3811 nlocal = m; 3812 } else { 3813 nlocal = n / size + ((n % size) > rank); 3814 } 3815 } else { 3816 nlocal = csize; 3817 } 3818 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3819 rstart = rend - nlocal; 3820 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3821 3822 /* next, compute all the lengths */ 3823 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3824 olens = dlens + m; 3825 for (i = 0; i < m; i++) { 3826 jend = ii[i + 1] - ii[i]; 3827 olen = 0; 3828 dlen = 0; 3829 for (j = 0; j < jend; j++) { 3830 if (*jj < rstart || *jj >= rend) olen++; 3831 else dlen++; 3832 jj++; 3833 } 3834 olens[i] = olen; 3835 dlens[i] = dlen; 3836 } 3837 PetscCall(MatCreate(comm, &M)); 3838 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3839 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3840 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3841 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3842 PetscCall(PetscFree(dlens)); 3843 } else { 3844 PetscInt ml, nl; 3845 3846 M = *newmat; 3847 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3848 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3849 PetscCall(MatZeroEntries(M)); 3850 /* 3851 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3852 rather than the slower MatSetValues(). 3853 */ 3854 M->was_assembled = PETSC_TRUE; 3855 M->assembled = PETSC_FALSE; 3856 } 3857 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3858 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3859 ii = aij->i; 3860 jj = aij->j; 3861 3862 /* trigger copy to CPU if needed */ 3863 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3864 for (i = 0; i < m; i++) { 3865 row = rstart + i; 3866 nz = ii[i + 1] - ii[i]; 3867 cwork = jj; 3868 jj += nz; 3869 vwork = aa; 3870 aa += nz; 3871 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3872 } 3873 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3874 3875 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3876 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3877 *newmat = M; 3878 3879 /* save submatrix used in processor for next request */ 3880 if (call == MAT_INITIAL_MATRIX) { 3881 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3882 PetscCall(MatDestroy(&Mreuse)); 3883 } 3884 PetscFunctionReturn(0); 3885 } 3886 3887 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3888 { 3889 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3890 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3891 const PetscInt *JJ; 3892 PetscBool nooffprocentries; 3893 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3894 3895 PetscFunctionBegin; 3896 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3897 3898 PetscCall(PetscLayoutSetUp(B->rmap)); 3899 PetscCall(PetscLayoutSetUp(B->cmap)); 3900 m = B->rmap->n; 3901 cstart = B->cmap->rstart; 3902 cend = B->cmap->rend; 3903 rstart = B->rmap->rstart; 3904 3905 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3906 3907 if (PetscDefined(USE_DEBUG)) { 3908 for (i = 0; i < m; i++) { 3909 nnz = Ii[i + 1] - Ii[i]; 3910 JJ = J + Ii[i]; 3911 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3912 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3913 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3914 } 3915 } 3916 3917 for (i = 0; i < m; i++) { 3918 nnz = Ii[i + 1] - Ii[i]; 3919 JJ = J + Ii[i]; 3920 nnz_max = PetscMax(nnz_max, nnz); 3921 d = 0; 3922 for (j = 0; j < nnz; j++) { 3923 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3924 } 3925 d_nnz[i] = d; 3926 o_nnz[i] = nnz - d; 3927 } 3928 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3929 PetscCall(PetscFree2(d_nnz, o_nnz)); 3930 3931 for (i = 0; i < m; i++) { 3932 ii = i + rstart; 3933 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3934 } 3935 nooffprocentries = B->nooffprocentries; 3936 B->nooffprocentries = PETSC_TRUE; 3937 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3938 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3939 B->nooffprocentries = nooffprocentries; 3940 3941 /* count number of entries below block diagonal */ 3942 PetscCall(PetscFree(Aij->ld)); 3943 PetscCall(PetscCalloc1(m, &ld)); 3944 Aij->ld = ld; 3945 for (i = 0; i < m; i++) { 3946 nnz = Ii[i + 1] - Ii[i]; 3947 j = 0; 3948 while (j < nnz && J[j] < cstart) j++; 3949 ld[i] = j; 3950 J += nnz; 3951 } 3952 3953 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3954 PetscFunctionReturn(0); 3955 } 3956 3957 /*@ 3958 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3959 (the default parallel PETSc format). 3960 3961 Collective 3962 3963 Input Parameters: 3964 + B - the matrix 3965 . i - the indices into j for the start of each local row (starts with zero) 3966 . j - the column indices for each local row (starts with zero) 3967 - v - optional values in the matrix 3968 3969 Level: developer 3970 3971 Notes: 3972 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3973 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3974 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3975 3976 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3977 3978 The format which is used for the sparse matrix input, is equivalent to a 3979 row-major ordering.. i.e for the following matrix, the input data expected is 3980 as shown 3981 3982 $ 1 0 0 3983 $ 2 0 3 P0 3984 $ ------- 3985 $ 4 5 6 P1 3986 $ 3987 $ Process0 [P0]: rows_owned=[0,1] 3988 $ i = {0,1,3} [size = nrow+1 = 2+1] 3989 $ j = {0,0,2} [size = 3] 3990 $ v = {1,2,3} [size = 3] 3991 $ 3992 $ Process1 [P1]: rows_owned=[2] 3993 $ i = {0,3} [size = nrow+1 = 1+1] 3994 $ j = {0,1,2} [size = 3] 3995 $ v = {4,5,6} [size = 3] 3996 3997 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3998 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3999 @*/ 4000 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4001 { 4002 PetscFunctionBegin; 4003 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4004 PetscFunctionReturn(0); 4005 } 4006 4007 /*@C 4008 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4009 (the default parallel PETSc format). For good matrix assembly performance 4010 the user should preallocate the matrix storage by setting the parameters 4011 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4012 performance can be increased by more than a factor of 50. 4013 4014 Collective 4015 4016 Input Parameters: 4017 + B - the matrix 4018 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4019 (same value is used for all local rows) 4020 . d_nnz - array containing the number of nonzeros in the various rows of the 4021 DIAGONAL portion of the local submatrix (possibly different for each row) 4022 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 4023 The size of this array is equal to the number of local rows, i.e 'm'. 4024 For matrices that will be factored, you must leave room for (and set) 4025 the diagonal entry even if it is zero. 4026 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4027 submatrix (same value is used for all local rows). 4028 - o_nnz - array containing the number of nonzeros in the various rows of the 4029 OFF-DIAGONAL portion of the local submatrix (possibly different for 4030 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 4031 structure. The size of this array is equal to the number 4032 of local rows, i.e 'm'. 4033 4034 If the *_nnz parameter is given then the *_nz parameter is ignored 4035 4036 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 4037 storage. The stored row and column indices begin with zero. 4038 See [Sparse Matrices](sec_matsparse) for details. 4039 4040 The parallel matrix is partitioned such that the first m0 rows belong to 4041 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4042 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4043 4044 The DIAGONAL portion of the local submatrix of a processor can be defined 4045 as the submatrix which is obtained by extraction the part corresponding to 4046 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4047 first row that belongs to the processor, r2 is the last row belonging to 4048 the this processor, and c1-c2 is range of indices of the local part of a 4049 vector suitable for applying the matrix to. This is an mxn matrix. In the 4050 common case of a square matrix, the row and column ranges are the same and 4051 the DIAGONAL part is also square. The remaining portion of the local 4052 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4053 4054 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4055 4056 You can call MatGetInfo() to get information on how effective the preallocation was; 4057 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4058 You can also run with the option -info and look for messages with the string 4059 malloc in them to see if additional memory allocation was needed. 4060 4061 Example usage: 4062 4063 Consider the following 8x8 matrix with 34 non-zero values, that is 4064 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4065 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4066 as follows: 4067 4068 .vb 4069 1 2 0 | 0 3 0 | 0 4 4070 Proc0 0 5 6 | 7 0 0 | 8 0 4071 9 0 10 | 11 0 0 | 12 0 4072 ------------------------------------- 4073 13 0 14 | 15 16 17 | 0 0 4074 Proc1 0 18 0 | 19 20 21 | 0 0 4075 0 0 0 | 22 23 0 | 24 0 4076 ------------------------------------- 4077 Proc2 25 26 27 | 0 0 28 | 29 0 4078 30 0 0 | 31 32 33 | 0 34 4079 .ve 4080 4081 This can be represented as a collection of submatrices as: 4082 4083 .vb 4084 A B C 4085 D E F 4086 G H I 4087 .ve 4088 4089 Where the submatrices A,B,C are owned by proc0, D,E,F are 4090 owned by proc1, G,H,I are owned by proc2. 4091 4092 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4093 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4094 The 'M','N' parameters are 8,8, and have the same values on all procs. 4095 4096 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4097 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4098 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4099 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4100 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 4101 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4102 4103 When d_nz, o_nz parameters are specified, d_nz storage elements are 4104 allocated for every row of the local diagonal submatrix, and o_nz 4105 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4106 One way to choose d_nz and o_nz is to use the max nonzerors per local 4107 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4108 In this case, the values of d_nz,o_nz are: 4109 .vb 4110 proc0 : dnz = 2, o_nz = 2 4111 proc1 : dnz = 3, o_nz = 2 4112 proc2 : dnz = 1, o_nz = 4 4113 .ve 4114 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4115 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4116 for proc3. i.e we are using 12+15+10=37 storage locations to store 4117 34 values. 4118 4119 When d_nnz, o_nnz parameters are specified, the storage is specified 4120 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4121 In the above case the values for d_nnz,o_nnz are: 4122 .vb 4123 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4124 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4125 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4126 .ve 4127 Here the space allocated is sum of all the above values i.e 34, and 4128 hence pre-allocation is perfect. 4129 4130 Level: intermediate 4131 4132 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4133 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4134 @*/ 4135 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4136 { 4137 PetscFunctionBegin; 4138 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4139 PetscValidType(B, 1); 4140 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4141 PetscFunctionReturn(0); 4142 } 4143 4144 /*@ 4145 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4146 CSR format for the local rows. 4147 4148 Collective 4149 4150 Input Parameters: 4151 + comm - MPI communicator 4152 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4153 . n - This value should be the same as the local size used in creating the 4154 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4155 calculated if N is given) For square matrices n is almost always m. 4156 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4157 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4158 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4159 . j - column indices 4160 - a - optional matrix values 4161 4162 Output Parameter: 4163 . mat - the matrix 4164 4165 Level: intermediate 4166 4167 Notes: 4168 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4169 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4170 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4171 4172 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4173 4174 The format which is used for the sparse matrix input, is equivalent to a 4175 row-major ordering.. i.e for the following matrix, the input data expected is 4176 as shown 4177 4178 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4179 4180 $ 1 0 0 4181 $ 2 0 3 P0 4182 $ ------- 4183 $ 4 5 6 P1 4184 $ 4185 $ Process0 [P0]: rows_owned=[0,1] 4186 $ i = {0,1,3} [size = nrow+1 = 2+1] 4187 $ j = {0,0,2} [size = 3] 4188 $ v = {1,2,3} [size = 3] 4189 $ 4190 $ Process1 [P1]: rows_owned=[2] 4191 $ i = {0,3} [size = nrow+1 = 1+1] 4192 $ j = {0,1,2} [size = 3] 4193 $ v = {4,5,6} [size = 3] 4194 4195 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4196 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4197 @*/ 4198 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4199 { 4200 PetscFunctionBegin; 4201 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4202 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4203 PetscCall(MatCreate(comm, mat)); 4204 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4205 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4206 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4207 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4208 PetscFunctionReturn(0); 4209 } 4210 4211 /*@ 4212 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4213 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4214 4215 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4216 4217 Collective 4218 4219 Input Parameters: 4220 + mat - the matrix 4221 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4222 . n - This value should be the same as the local size used in creating the 4223 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4224 calculated if N is given) For square matrices n is almost always m. 4225 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4226 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4227 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4228 . J - column indices 4229 - v - matrix values 4230 4231 Level: intermediate 4232 4233 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4234 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4235 @*/ 4236 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4237 { 4238 PetscInt nnz, i; 4239 PetscBool nooffprocentries; 4240 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4241 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4242 PetscScalar *ad, *ao; 4243 PetscInt ldi, Iii, md; 4244 const PetscInt *Adi = Ad->i; 4245 PetscInt *ld = Aij->ld; 4246 4247 PetscFunctionBegin; 4248 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4249 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4250 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4251 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4252 4253 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4254 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4255 4256 for (i = 0; i < m; i++) { 4257 nnz = Ii[i + 1] - Ii[i]; 4258 Iii = Ii[i]; 4259 ldi = ld[i]; 4260 md = Adi[i + 1] - Adi[i]; 4261 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4262 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4263 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4264 ad += md; 4265 ao += nnz - md; 4266 } 4267 nooffprocentries = mat->nooffprocentries; 4268 mat->nooffprocentries = PETSC_TRUE; 4269 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4270 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4271 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4272 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4273 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4274 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4275 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4276 mat->nooffprocentries = nooffprocentries; 4277 PetscFunctionReturn(0); 4278 } 4279 4280 /*@ 4281 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4282 4283 Collective 4284 4285 Input Parameters: 4286 + mat - the matrix 4287 - v - matrix values, stored by row 4288 4289 Level: intermediate 4290 4291 Note: 4292 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4293 4294 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4295 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4296 @*/ 4297 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4298 { 4299 PetscInt nnz, i, m; 4300 PetscBool nooffprocentries; 4301 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4302 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4303 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4304 PetscScalar *ad, *ao; 4305 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4306 PetscInt ldi, Iii, md; 4307 PetscInt *ld = Aij->ld; 4308 4309 PetscFunctionBegin; 4310 m = mat->rmap->n; 4311 4312 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4313 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4314 Iii = 0; 4315 for (i = 0; i < m; i++) { 4316 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4317 ldi = ld[i]; 4318 md = Adi[i + 1] - Adi[i]; 4319 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4320 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4321 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4322 ad += md; 4323 ao += nnz - md; 4324 Iii += nnz; 4325 } 4326 nooffprocentries = mat->nooffprocentries; 4327 mat->nooffprocentries = PETSC_TRUE; 4328 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4329 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4330 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4331 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4332 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4333 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4334 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4335 mat->nooffprocentries = nooffprocentries; 4336 PetscFunctionReturn(0); 4337 } 4338 4339 /*@C 4340 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4341 (the default parallel PETSc format). For good matrix assembly performance 4342 the user should preallocate the matrix storage by setting the parameters 4343 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4344 performance can be increased by more than a factor of 50. 4345 4346 Collective 4347 4348 Input Parameters: 4349 + comm - MPI communicator 4350 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4351 This value should be the same as the local size used in creating the 4352 y vector for the matrix-vector product y = Ax. 4353 . n - This value should be the same as the local size used in creating the 4354 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4355 calculated if N is given) For square matrices n is almost always m. 4356 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4357 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4358 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4359 (same value is used for all local rows) 4360 . d_nnz - array containing the number of nonzeros in the various rows of the 4361 DIAGONAL portion of the local submatrix (possibly different for each row) 4362 or NULL, if d_nz is used to specify the nonzero structure. 4363 The size of this array is equal to the number of local rows, i.e 'm'. 4364 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4365 submatrix (same value is used for all local rows). 4366 - o_nnz - array containing the number of nonzeros in the various rows of the 4367 OFF-DIAGONAL portion of the local submatrix (possibly different for 4368 each row) or NULL, if o_nz is used to specify the nonzero 4369 structure. The size of this array is equal to the number 4370 of local rows, i.e 'm'. 4371 4372 Output Parameter: 4373 . A - the matrix 4374 4375 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4376 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4377 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4378 4379 Notes: 4380 If the *_nnz parameter is given then the *_nz parameter is ignored 4381 4382 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4383 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4384 storage requirements for this matrix. 4385 4386 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4387 processor than it must be used on all processors that share the object for 4388 that argument. 4389 4390 The user MUST specify either the local or global matrix dimensions 4391 (possibly both). 4392 4393 The parallel matrix is partitioned across processors such that the 4394 first m0 rows belong to process 0, the next m1 rows belong to 4395 process 1, the next m2 rows belong to process 2 etc.. where 4396 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4397 values corresponding to [m x N] submatrix. 4398 4399 The columns are logically partitioned with the n0 columns belonging 4400 to 0th partition, the next n1 columns belonging to the next 4401 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4402 4403 The DIAGONAL portion of the local submatrix on any given processor 4404 is the submatrix corresponding to the rows and columns m,n 4405 corresponding to the given processor. i.e diagonal matrix on 4406 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4407 etc. The remaining portion of the local submatrix [m x (N-n)] 4408 constitute the OFF-DIAGONAL portion. The example below better 4409 illustrates this concept. 4410 4411 For a square global matrix we define each processor's diagonal portion 4412 to be its local rows and the corresponding columns (a square submatrix); 4413 each processor's off-diagonal portion encompasses the remainder of the 4414 local matrix (a rectangular submatrix). 4415 4416 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4417 4418 When calling this routine with a single process communicator, a matrix of 4419 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4420 type of communicator, use the construction mechanism 4421 .vb 4422 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4423 .ve 4424 4425 $ MatCreate(...,&A); 4426 $ MatSetType(A,MATMPIAIJ); 4427 $ MatSetSizes(A, m,n,M,N); 4428 $ MatMPIAIJSetPreallocation(A,...); 4429 4430 By default, this format uses inodes (identical nodes) when possible. 4431 We search for consecutive rows with the same nonzero structure, thereby 4432 reusing matrix information to achieve increased efficiency. 4433 4434 Options Database Keys: 4435 + -mat_no_inode - Do not use inodes 4436 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4437 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4438 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4439 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4440 4441 Example usage: 4442 4443 Consider the following 8x8 matrix with 34 non-zero values, that is 4444 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4445 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4446 as follows 4447 4448 .vb 4449 1 2 0 | 0 3 0 | 0 4 4450 Proc0 0 5 6 | 7 0 0 | 8 0 4451 9 0 10 | 11 0 0 | 12 0 4452 ------------------------------------- 4453 13 0 14 | 15 16 17 | 0 0 4454 Proc1 0 18 0 | 19 20 21 | 0 0 4455 0 0 0 | 22 23 0 | 24 0 4456 ------------------------------------- 4457 Proc2 25 26 27 | 0 0 28 | 29 0 4458 30 0 0 | 31 32 33 | 0 34 4459 .ve 4460 4461 This can be represented as a collection of submatrices as 4462 4463 .vb 4464 A B C 4465 D E F 4466 G H I 4467 .ve 4468 4469 Where the submatrices A,B,C are owned by proc0, D,E,F are 4470 owned by proc1, G,H,I are owned by proc2. 4471 4472 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4473 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4474 The 'M','N' parameters are 8,8, and have the same values on all procs. 4475 4476 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4477 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4478 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4479 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4480 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4481 matrix, ans [DF] as another SeqAIJ matrix. 4482 4483 When d_nz, o_nz parameters are specified, d_nz storage elements are 4484 allocated for every row of the local diagonal submatrix, and o_nz 4485 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4486 One way to choose d_nz and o_nz is to use the max nonzerors per local 4487 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4488 In this case, the values of d_nz,o_nz are 4489 .vb 4490 proc0 : dnz = 2, o_nz = 2 4491 proc1 : dnz = 3, o_nz = 2 4492 proc2 : dnz = 1, o_nz = 4 4493 .ve 4494 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4495 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4496 for proc3. i.e we are using 12+15+10=37 storage locations to store 4497 34 values. 4498 4499 When d_nnz, o_nnz parameters are specified, the storage is specified 4500 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4501 In the above case the values for d_nnz,o_nnz are 4502 .vb 4503 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4504 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4505 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4506 .ve 4507 Here the space allocated is sum of all the above values i.e 34, and 4508 hence pre-allocation is perfect. 4509 4510 Level: intermediate 4511 4512 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4513 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4514 @*/ 4515 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4516 { 4517 PetscMPIInt size; 4518 4519 PetscFunctionBegin; 4520 PetscCall(MatCreate(comm, A)); 4521 PetscCall(MatSetSizes(*A, m, n, M, N)); 4522 PetscCallMPI(MPI_Comm_size(comm, &size)); 4523 if (size > 1) { 4524 PetscCall(MatSetType(*A, MATMPIAIJ)); 4525 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4526 } else { 4527 PetscCall(MatSetType(*A, MATSEQAIJ)); 4528 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4529 } 4530 PetscFunctionReturn(0); 4531 } 4532 4533 /*@C 4534 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4535 4536 Not collective 4537 4538 Input Parameter: 4539 . A - The `MATMPIAIJ` matrix 4540 4541 Output Parameters: 4542 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4543 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4544 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4545 4546 Note: 4547 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4548 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4549 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4550 local column numbers to global column numbers in the original matrix. 4551 4552 Level: intermediate 4553 4554 .seealso: `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4555 @*/ 4556 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4557 { 4558 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4559 PetscBool flg; 4560 4561 PetscFunctionBegin; 4562 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4563 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4564 if (Ad) *Ad = a->A; 4565 if (Ao) *Ao = a->B; 4566 if (colmap) *colmap = a->garray; 4567 PetscFunctionReturn(0); 4568 } 4569 4570 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4571 { 4572 PetscInt m, N, i, rstart, nnz, Ii; 4573 PetscInt *indx; 4574 PetscScalar *values; 4575 MatType rootType; 4576 4577 PetscFunctionBegin; 4578 PetscCall(MatGetSize(inmat, &m, &N)); 4579 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4580 PetscInt *dnz, *onz, sum, bs, cbs; 4581 4582 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4583 /* Check sum(n) = N */ 4584 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4585 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4586 4587 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4588 rstart -= m; 4589 4590 MatPreallocateBegin(comm, m, n, dnz, onz); 4591 for (i = 0; i < m; i++) { 4592 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4593 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4594 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4595 } 4596 4597 PetscCall(MatCreate(comm, outmat)); 4598 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4599 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4600 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4601 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4602 PetscCall(MatSetType(*outmat, rootType)); 4603 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4604 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4605 MatPreallocateEnd(dnz, onz); 4606 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4607 } 4608 4609 /* numeric phase */ 4610 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4611 for (i = 0; i < m; i++) { 4612 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4613 Ii = i + rstart; 4614 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4615 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4616 } 4617 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4618 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4619 PetscFunctionReturn(0); 4620 } 4621 4622 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4623 { 4624 PetscMPIInt rank; 4625 PetscInt m, N, i, rstart, nnz; 4626 size_t len; 4627 const PetscInt *indx; 4628 PetscViewer out; 4629 char *name; 4630 Mat B; 4631 const PetscScalar *values; 4632 4633 PetscFunctionBegin; 4634 PetscCall(MatGetLocalSize(A, &m, NULL)); 4635 PetscCall(MatGetSize(A, NULL, &N)); 4636 /* Should this be the type of the diagonal block of A? */ 4637 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4638 PetscCall(MatSetSizes(B, m, N, m, N)); 4639 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4640 PetscCall(MatSetType(B, MATSEQAIJ)); 4641 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4642 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4643 for (i = 0; i < m; i++) { 4644 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4645 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4646 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4647 } 4648 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4649 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4650 4651 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4652 PetscCall(PetscStrlen(outfile, &len)); 4653 PetscCall(PetscMalloc1(len + 6, &name)); 4654 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4655 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4656 PetscCall(PetscFree(name)); 4657 PetscCall(MatView(B, out)); 4658 PetscCall(PetscViewerDestroy(&out)); 4659 PetscCall(MatDestroy(&B)); 4660 PetscFunctionReturn(0); 4661 } 4662 4663 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4664 { 4665 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4666 4667 PetscFunctionBegin; 4668 if (!merge) PetscFunctionReturn(0); 4669 PetscCall(PetscFree(merge->id_r)); 4670 PetscCall(PetscFree(merge->len_s)); 4671 PetscCall(PetscFree(merge->len_r)); 4672 PetscCall(PetscFree(merge->bi)); 4673 PetscCall(PetscFree(merge->bj)); 4674 PetscCall(PetscFree(merge->buf_ri[0])); 4675 PetscCall(PetscFree(merge->buf_ri)); 4676 PetscCall(PetscFree(merge->buf_rj[0])); 4677 PetscCall(PetscFree(merge->buf_rj)); 4678 PetscCall(PetscFree(merge->coi)); 4679 PetscCall(PetscFree(merge->coj)); 4680 PetscCall(PetscFree(merge->owners_co)); 4681 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4682 PetscCall(PetscFree(merge)); 4683 PetscFunctionReturn(0); 4684 } 4685 4686 #include <../src/mat/utils/freespace.h> 4687 #include <petscbt.h> 4688 4689 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4690 { 4691 MPI_Comm comm; 4692 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4693 PetscMPIInt size, rank, taga, *len_s; 4694 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4695 PetscInt proc, m; 4696 PetscInt **buf_ri, **buf_rj; 4697 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4698 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4699 MPI_Request *s_waits, *r_waits; 4700 MPI_Status *status; 4701 const MatScalar *aa, *a_a; 4702 MatScalar **abuf_r, *ba_i; 4703 Mat_Merge_SeqsToMPI *merge; 4704 PetscContainer container; 4705 4706 PetscFunctionBegin; 4707 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4708 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4709 4710 PetscCallMPI(MPI_Comm_size(comm, &size)); 4711 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4712 4713 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4714 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4715 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4716 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4717 aa = a_a; 4718 4719 bi = merge->bi; 4720 bj = merge->bj; 4721 buf_ri = merge->buf_ri; 4722 buf_rj = merge->buf_rj; 4723 4724 PetscCall(PetscMalloc1(size, &status)); 4725 owners = merge->rowmap->range; 4726 len_s = merge->len_s; 4727 4728 /* send and recv matrix values */ 4729 /*-----------------------------*/ 4730 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4731 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4732 4733 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4734 for (proc = 0, k = 0; proc < size; proc++) { 4735 if (!len_s[proc]) continue; 4736 i = owners[proc]; 4737 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4738 k++; 4739 } 4740 4741 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4742 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4743 PetscCall(PetscFree(status)); 4744 4745 PetscCall(PetscFree(s_waits)); 4746 PetscCall(PetscFree(r_waits)); 4747 4748 /* insert mat values of mpimat */ 4749 /*----------------------------*/ 4750 PetscCall(PetscMalloc1(N, &ba_i)); 4751 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4752 4753 for (k = 0; k < merge->nrecv; k++) { 4754 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4755 nrows = *(buf_ri_k[k]); 4756 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4757 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4758 } 4759 4760 /* set values of ba */ 4761 m = merge->rowmap->n; 4762 for (i = 0; i < m; i++) { 4763 arow = owners[rank] + i; 4764 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4765 bnzi = bi[i + 1] - bi[i]; 4766 PetscCall(PetscArrayzero(ba_i, bnzi)); 4767 4768 /* add local non-zero vals of this proc's seqmat into ba */ 4769 anzi = ai[arow + 1] - ai[arow]; 4770 aj = a->j + ai[arow]; 4771 aa = a_a + ai[arow]; 4772 nextaj = 0; 4773 for (j = 0; nextaj < anzi; j++) { 4774 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4775 ba_i[j] += aa[nextaj++]; 4776 } 4777 } 4778 4779 /* add received vals into ba */ 4780 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4781 /* i-th row */ 4782 if (i == *nextrow[k]) { 4783 anzi = *(nextai[k] + 1) - *nextai[k]; 4784 aj = buf_rj[k] + *(nextai[k]); 4785 aa = abuf_r[k] + *(nextai[k]); 4786 nextaj = 0; 4787 for (j = 0; nextaj < anzi; j++) { 4788 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4789 ba_i[j] += aa[nextaj++]; 4790 } 4791 } 4792 nextrow[k]++; 4793 nextai[k]++; 4794 } 4795 } 4796 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4797 } 4798 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4799 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4800 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4801 4802 PetscCall(PetscFree(abuf_r[0])); 4803 PetscCall(PetscFree(abuf_r)); 4804 PetscCall(PetscFree(ba_i)); 4805 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4806 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4807 PetscFunctionReturn(0); 4808 } 4809 4810 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4811 { 4812 Mat B_mpi; 4813 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4814 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4815 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4816 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4817 PetscInt len, proc, *dnz, *onz, bs, cbs; 4818 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4819 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4820 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4821 MPI_Status *status; 4822 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4823 PetscBT lnkbt; 4824 Mat_Merge_SeqsToMPI *merge; 4825 PetscContainer container; 4826 4827 PetscFunctionBegin; 4828 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4829 4830 /* make sure it is a PETSc comm */ 4831 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4832 PetscCallMPI(MPI_Comm_size(comm, &size)); 4833 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4834 4835 PetscCall(PetscNew(&merge)); 4836 PetscCall(PetscMalloc1(size, &status)); 4837 4838 /* determine row ownership */ 4839 /*---------------------------------------------------------*/ 4840 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4841 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4842 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4843 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4844 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4845 PetscCall(PetscMalloc1(size, &len_si)); 4846 PetscCall(PetscMalloc1(size, &merge->len_s)); 4847 4848 m = merge->rowmap->n; 4849 owners = merge->rowmap->range; 4850 4851 /* determine the number of messages to send, their lengths */ 4852 /*---------------------------------------------------------*/ 4853 len_s = merge->len_s; 4854 4855 len = 0; /* length of buf_si[] */ 4856 merge->nsend = 0; 4857 for (proc = 0; proc < size; proc++) { 4858 len_si[proc] = 0; 4859 if (proc == rank) { 4860 len_s[proc] = 0; 4861 } else { 4862 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4863 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4864 } 4865 if (len_s[proc]) { 4866 merge->nsend++; 4867 nrows = 0; 4868 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4869 if (ai[i + 1] > ai[i]) nrows++; 4870 } 4871 len_si[proc] = 2 * (nrows + 1); 4872 len += len_si[proc]; 4873 } 4874 } 4875 4876 /* determine the number and length of messages to receive for ij-structure */ 4877 /*-------------------------------------------------------------------------*/ 4878 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4879 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4880 4881 /* post the Irecv of j-structure */ 4882 /*-------------------------------*/ 4883 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4884 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4885 4886 /* post the Isend of j-structure */ 4887 /*--------------------------------*/ 4888 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4889 4890 for (proc = 0, k = 0; proc < size; proc++) { 4891 if (!len_s[proc]) continue; 4892 i = owners[proc]; 4893 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4894 k++; 4895 } 4896 4897 /* receives and sends of j-structure are complete */ 4898 /*------------------------------------------------*/ 4899 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4900 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4901 4902 /* send and recv i-structure */ 4903 /*---------------------------*/ 4904 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4905 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4906 4907 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4908 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4909 for (proc = 0, k = 0; proc < size; proc++) { 4910 if (!len_s[proc]) continue; 4911 /* form outgoing message for i-structure: 4912 buf_si[0]: nrows to be sent 4913 [1:nrows]: row index (global) 4914 [nrows+1:2*nrows+1]: i-structure index 4915 */ 4916 /*-------------------------------------------*/ 4917 nrows = len_si[proc] / 2 - 1; 4918 buf_si_i = buf_si + nrows + 1; 4919 buf_si[0] = nrows; 4920 buf_si_i[0] = 0; 4921 nrows = 0; 4922 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4923 anzi = ai[i + 1] - ai[i]; 4924 if (anzi) { 4925 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4926 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4927 nrows++; 4928 } 4929 } 4930 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4931 k++; 4932 buf_si += len_si[proc]; 4933 } 4934 4935 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4936 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4937 4938 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4939 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4940 4941 PetscCall(PetscFree(len_si)); 4942 PetscCall(PetscFree(len_ri)); 4943 PetscCall(PetscFree(rj_waits)); 4944 PetscCall(PetscFree2(si_waits, sj_waits)); 4945 PetscCall(PetscFree(ri_waits)); 4946 PetscCall(PetscFree(buf_s)); 4947 PetscCall(PetscFree(status)); 4948 4949 /* compute a local seq matrix in each processor */ 4950 /*----------------------------------------------*/ 4951 /* allocate bi array and free space for accumulating nonzero column info */ 4952 PetscCall(PetscMalloc1(m + 1, &bi)); 4953 bi[0] = 0; 4954 4955 /* create and initialize a linked list */ 4956 nlnk = N + 1; 4957 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4958 4959 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4960 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4961 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4962 4963 current_space = free_space; 4964 4965 /* determine symbolic info for each local row */ 4966 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4967 4968 for (k = 0; k < merge->nrecv; k++) { 4969 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4970 nrows = *buf_ri_k[k]; 4971 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4972 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4973 } 4974 4975 MatPreallocateBegin(comm, m, n, dnz, onz); 4976 len = 0; 4977 for (i = 0; i < m; i++) { 4978 bnzi = 0; 4979 /* add local non-zero cols of this proc's seqmat into lnk */ 4980 arow = owners[rank] + i; 4981 anzi = ai[arow + 1] - ai[arow]; 4982 aj = a->j + ai[arow]; 4983 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4984 bnzi += nlnk; 4985 /* add received col data into lnk */ 4986 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4987 if (i == *nextrow[k]) { /* i-th row */ 4988 anzi = *(nextai[k] + 1) - *nextai[k]; 4989 aj = buf_rj[k] + *nextai[k]; 4990 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4991 bnzi += nlnk; 4992 nextrow[k]++; 4993 nextai[k]++; 4994 } 4995 } 4996 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4997 4998 /* if free space is not available, make more free space */ 4999 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5000 /* copy data into free space, then initialize lnk */ 5001 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5002 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5003 5004 current_space->array += bnzi; 5005 current_space->local_used += bnzi; 5006 current_space->local_remaining -= bnzi; 5007 5008 bi[i + 1] = bi[i] + bnzi; 5009 } 5010 5011 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5012 5013 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5014 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5015 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5016 5017 /* create symbolic parallel matrix B_mpi */ 5018 /*---------------------------------------*/ 5019 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5020 PetscCall(MatCreate(comm, &B_mpi)); 5021 if (n == PETSC_DECIDE) { 5022 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5023 } else { 5024 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5025 } 5026 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5027 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5028 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5029 MatPreallocateEnd(dnz, onz); 5030 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5031 5032 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5033 B_mpi->assembled = PETSC_FALSE; 5034 merge->bi = bi; 5035 merge->bj = bj; 5036 merge->buf_ri = buf_ri; 5037 merge->buf_rj = buf_rj; 5038 merge->coi = NULL; 5039 merge->coj = NULL; 5040 merge->owners_co = NULL; 5041 5042 PetscCall(PetscCommDestroy(&comm)); 5043 5044 /* attach the supporting struct to B_mpi for reuse */ 5045 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5046 PetscCall(PetscContainerSetPointer(container, merge)); 5047 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5048 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5049 PetscCall(PetscContainerDestroy(&container)); 5050 *mpimat = B_mpi; 5051 5052 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5053 PetscFunctionReturn(0); 5054 } 5055 5056 /*@C 5057 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5058 matrices from each processor 5059 5060 Collective 5061 5062 Input Parameters: 5063 + comm - the communicators the parallel matrix will live on 5064 . seqmat - the input sequential matrices 5065 . m - number of local rows (or `PETSC_DECIDE`) 5066 . n - number of local columns (or `PETSC_DECIDE`) 5067 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5068 5069 Output Parameter: 5070 . mpimat - the parallel matrix generated 5071 5072 Level: advanced 5073 5074 Note: 5075 The dimensions of the sequential matrix in each processor MUST be the same. 5076 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5077 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5078 @*/ 5079 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5080 { 5081 PetscMPIInt size; 5082 5083 PetscFunctionBegin; 5084 PetscCallMPI(MPI_Comm_size(comm, &size)); 5085 if (size == 1) { 5086 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5087 if (scall == MAT_INITIAL_MATRIX) { 5088 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5089 } else { 5090 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5091 } 5092 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5093 PetscFunctionReturn(0); 5094 } 5095 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5096 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5097 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5098 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5099 PetscFunctionReturn(0); 5100 } 5101 5102 /*@ 5103 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5104 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5105 with `MatGetSize()` 5106 5107 Not Collective 5108 5109 Input Parameters: 5110 + A - the matrix 5111 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5112 5113 Output Parameter: 5114 . A_loc - the local sequential matrix generated 5115 5116 Level: developer 5117 5118 Notes: 5119 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5120 5121 Destroy the matrix with `MatDestroy()` 5122 5123 .seealso: `MatMPIAIJGetLocalMat()` 5124 @*/ 5125 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5126 { 5127 PetscBool mpi; 5128 5129 PetscFunctionBegin; 5130 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5131 if (mpi) { 5132 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5133 } else { 5134 *A_loc = A; 5135 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5136 } 5137 PetscFunctionReturn(0); 5138 } 5139 5140 /*@ 5141 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5142 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5143 with `MatGetSize()` 5144 5145 Not Collective 5146 5147 Input Parameters: 5148 + A - the matrix 5149 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5150 5151 Output Parameter: 5152 . A_loc - the local sequential matrix generated 5153 5154 Level: developer 5155 5156 Notes: 5157 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5158 5159 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5160 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5161 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5162 modify the values of the returned A_loc. 5163 5164 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5165 @*/ 5166 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5167 { 5168 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5169 Mat_SeqAIJ *mat, *a, *b; 5170 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5171 const PetscScalar *aa, *ba, *aav, *bav; 5172 PetscScalar *ca, *cam; 5173 PetscMPIInt size; 5174 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5175 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5176 PetscBool match; 5177 5178 PetscFunctionBegin; 5179 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5180 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5181 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5182 if (size == 1) { 5183 if (scall == MAT_INITIAL_MATRIX) { 5184 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5185 *A_loc = mpimat->A; 5186 } else if (scall == MAT_REUSE_MATRIX) { 5187 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5188 } 5189 PetscFunctionReturn(0); 5190 } 5191 5192 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5193 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5194 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5195 ai = a->i; 5196 aj = a->j; 5197 bi = b->i; 5198 bj = b->j; 5199 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5200 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5201 aa = aav; 5202 ba = bav; 5203 if (scall == MAT_INITIAL_MATRIX) { 5204 PetscCall(PetscMalloc1(1 + am, &ci)); 5205 ci[0] = 0; 5206 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5207 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5208 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5209 k = 0; 5210 for (i = 0; i < am; i++) { 5211 ncols_o = bi[i + 1] - bi[i]; 5212 ncols_d = ai[i + 1] - ai[i]; 5213 /* off-diagonal portion of A */ 5214 for (jo = 0; jo < ncols_o; jo++) { 5215 col = cmap[*bj]; 5216 if (col >= cstart) break; 5217 cj[k] = col; 5218 bj++; 5219 ca[k++] = *ba++; 5220 } 5221 /* diagonal portion of A */ 5222 for (j = 0; j < ncols_d; j++) { 5223 cj[k] = cstart + *aj++; 5224 ca[k++] = *aa++; 5225 } 5226 /* off-diagonal portion of A */ 5227 for (j = jo; j < ncols_o; j++) { 5228 cj[k] = cmap[*bj++]; 5229 ca[k++] = *ba++; 5230 } 5231 } 5232 /* put together the new matrix */ 5233 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5234 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5235 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5236 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5237 mat->free_a = PETSC_TRUE; 5238 mat->free_ij = PETSC_TRUE; 5239 mat->nonew = 0; 5240 } else if (scall == MAT_REUSE_MATRIX) { 5241 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5242 ci = mat->i; 5243 cj = mat->j; 5244 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5245 for (i = 0; i < am; i++) { 5246 /* off-diagonal portion of A */ 5247 ncols_o = bi[i + 1] - bi[i]; 5248 for (jo = 0; jo < ncols_o; jo++) { 5249 col = cmap[*bj]; 5250 if (col >= cstart) break; 5251 *cam++ = *ba++; 5252 bj++; 5253 } 5254 /* diagonal portion of A */ 5255 ncols_d = ai[i + 1] - ai[i]; 5256 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5257 /* off-diagonal portion of A */ 5258 for (j = jo; j < ncols_o; j++) { 5259 *cam++ = *ba++; 5260 bj++; 5261 } 5262 } 5263 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5264 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5265 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5266 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5267 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5268 PetscFunctionReturn(0); 5269 } 5270 5271 /*@ 5272 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5273 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5274 5275 Not Collective 5276 5277 Input Parameters: 5278 + A - the matrix 5279 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5280 5281 Output Parameters: 5282 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5283 - A_loc - the local sequential matrix generated 5284 5285 Level: developer 5286 5287 Note: 5288 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5289 5290 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5291 @*/ 5292 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5293 { 5294 Mat Ao, Ad; 5295 const PetscInt *cmap; 5296 PetscMPIInt size; 5297 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5298 5299 PetscFunctionBegin; 5300 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5301 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5302 if (size == 1) { 5303 if (scall == MAT_INITIAL_MATRIX) { 5304 PetscCall(PetscObjectReference((PetscObject)Ad)); 5305 *A_loc = Ad; 5306 } else if (scall == MAT_REUSE_MATRIX) { 5307 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5308 } 5309 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5310 PetscFunctionReturn(0); 5311 } 5312 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5313 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5314 if (f) { 5315 PetscCall((*f)(A, scall, glob, A_loc)); 5316 } else { 5317 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5318 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5319 Mat_SeqAIJ *c; 5320 PetscInt *ai = a->i, *aj = a->j; 5321 PetscInt *bi = b->i, *bj = b->j; 5322 PetscInt *ci, *cj; 5323 const PetscScalar *aa, *ba; 5324 PetscScalar *ca; 5325 PetscInt i, j, am, dn, on; 5326 5327 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5328 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5329 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5330 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5331 if (scall == MAT_INITIAL_MATRIX) { 5332 PetscInt k; 5333 PetscCall(PetscMalloc1(1 + am, &ci)); 5334 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5335 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5336 ci[0] = 0; 5337 for (i = 0, k = 0; i < am; i++) { 5338 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5339 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5340 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5341 /* diagonal portion of A */ 5342 for (j = 0; j < ncols_d; j++, k++) { 5343 cj[k] = *aj++; 5344 ca[k] = *aa++; 5345 } 5346 /* off-diagonal portion of A */ 5347 for (j = 0; j < ncols_o; j++, k++) { 5348 cj[k] = dn + *bj++; 5349 ca[k] = *ba++; 5350 } 5351 } 5352 /* put together the new matrix */ 5353 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5354 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5355 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5356 c = (Mat_SeqAIJ *)(*A_loc)->data; 5357 c->free_a = PETSC_TRUE; 5358 c->free_ij = PETSC_TRUE; 5359 c->nonew = 0; 5360 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5361 } else if (scall == MAT_REUSE_MATRIX) { 5362 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5363 for (i = 0; i < am; i++) { 5364 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5365 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5366 /* diagonal portion of A */ 5367 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5368 /* off-diagonal portion of A */ 5369 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5370 } 5371 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5372 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5373 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5374 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5375 if (glob) { 5376 PetscInt cst, *gidx; 5377 5378 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5379 PetscCall(PetscMalloc1(dn + on, &gidx)); 5380 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5381 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5382 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5383 } 5384 } 5385 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5386 PetscFunctionReturn(0); 5387 } 5388 5389 /*@C 5390 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5391 5392 Not Collective 5393 5394 Input Parameters: 5395 + A - the matrix 5396 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5397 - row, col - index sets of rows and columns to extract (or NULL) 5398 5399 Output Parameter: 5400 . A_loc - the local sequential matrix generated 5401 5402 Level: developer 5403 5404 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5405 @*/ 5406 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5407 { 5408 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5409 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5410 IS isrowa, iscola; 5411 Mat *aloc; 5412 PetscBool match; 5413 5414 PetscFunctionBegin; 5415 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5416 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5417 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5418 if (!row) { 5419 start = A->rmap->rstart; 5420 end = A->rmap->rend; 5421 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5422 } else { 5423 isrowa = *row; 5424 } 5425 if (!col) { 5426 start = A->cmap->rstart; 5427 cmap = a->garray; 5428 nzA = a->A->cmap->n; 5429 nzB = a->B->cmap->n; 5430 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5431 ncols = 0; 5432 for (i = 0; i < nzB; i++) { 5433 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5434 else break; 5435 } 5436 imark = i; 5437 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5438 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5439 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5440 } else { 5441 iscola = *col; 5442 } 5443 if (scall != MAT_INITIAL_MATRIX) { 5444 PetscCall(PetscMalloc1(1, &aloc)); 5445 aloc[0] = *A_loc; 5446 } 5447 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5448 if (!col) { /* attach global id of condensed columns */ 5449 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5450 } 5451 *A_loc = aloc[0]; 5452 PetscCall(PetscFree(aloc)); 5453 if (!row) PetscCall(ISDestroy(&isrowa)); 5454 if (!col) PetscCall(ISDestroy(&iscola)); 5455 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5456 PetscFunctionReturn(0); 5457 } 5458 5459 /* 5460 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5461 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5462 * on a global size. 5463 * */ 5464 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5465 { 5466 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5467 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5468 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5469 PetscMPIInt owner; 5470 PetscSFNode *iremote, *oiremote; 5471 const PetscInt *lrowindices; 5472 PetscSF sf, osf; 5473 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5474 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5475 MPI_Comm comm; 5476 ISLocalToGlobalMapping mapping; 5477 const PetscScalar *pd_a, *po_a; 5478 5479 PetscFunctionBegin; 5480 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5481 /* plocalsize is the number of roots 5482 * nrows is the number of leaves 5483 * */ 5484 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5485 PetscCall(ISGetLocalSize(rows, &nrows)); 5486 PetscCall(PetscCalloc1(nrows, &iremote)); 5487 PetscCall(ISGetIndices(rows, &lrowindices)); 5488 for (i = 0; i < nrows; i++) { 5489 /* Find a remote index and an owner for a row 5490 * The row could be local or remote 5491 * */ 5492 owner = 0; 5493 lidx = 0; 5494 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5495 iremote[i].index = lidx; 5496 iremote[i].rank = owner; 5497 } 5498 /* Create SF to communicate how many nonzero columns for each row */ 5499 PetscCall(PetscSFCreate(comm, &sf)); 5500 /* SF will figure out the number of nonzero colunms for each row, and their 5501 * offsets 5502 * */ 5503 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5504 PetscCall(PetscSFSetFromOptions(sf)); 5505 PetscCall(PetscSFSetUp(sf)); 5506 5507 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5508 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5509 PetscCall(PetscCalloc1(nrows, &pnnz)); 5510 roffsets[0] = 0; 5511 roffsets[1] = 0; 5512 for (i = 0; i < plocalsize; i++) { 5513 /* diag */ 5514 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5515 /* off diag */ 5516 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5517 /* compute offsets so that we relative location for each row */ 5518 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5519 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5520 } 5521 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5522 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5523 /* 'r' means root, and 'l' means leaf */ 5524 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5525 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5526 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5527 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5528 PetscCall(PetscSFDestroy(&sf)); 5529 PetscCall(PetscFree(roffsets)); 5530 PetscCall(PetscFree(nrcols)); 5531 dntotalcols = 0; 5532 ontotalcols = 0; 5533 ncol = 0; 5534 for (i = 0; i < nrows; i++) { 5535 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5536 ncol = PetscMax(pnnz[i], ncol); 5537 /* diag */ 5538 dntotalcols += nlcols[i * 2 + 0]; 5539 /* off diag */ 5540 ontotalcols += nlcols[i * 2 + 1]; 5541 } 5542 /* We do not need to figure the right number of columns 5543 * since all the calculations will be done by going through the raw data 5544 * */ 5545 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5546 PetscCall(MatSetUp(*P_oth)); 5547 PetscCall(PetscFree(pnnz)); 5548 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5549 /* diag */ 5550 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5551 /* off diag */ 5552 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5553 /* diag */ 5554 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5555 /* off diag */ 5556 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5557 dntotalcols = 0; 5558 ontotalcols = 0; 5559 ntotalcols = 0; 5560 for (i = 0; i < nrows; i++) { 5561 owner = 0; 5562 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5563 /* Set iremote for diag matrix */ 5564 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5565 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5566 iremote[dntotalcols].rank = owner; 5567 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5568 ilocal[dntotalcols++] = ntotalcols++; 5569 } 5570 /* off diag */ 5571 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5572 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5573 oiremote[ontotalcols].rank = owner; 5574 oilocal[ontotalcols++] = ntotalcols++; 5575 } 5576 } 5577 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5578 PetscCall(PetscFree(loffsets)); 5579 PetscCall(PetscFree(nlcols)); 5580 PetscCall(PetscSFCreate(comm, &sf)); 5581 /* P serves as roots and P_oth is leaves 5582 * Diag matrix 5583 * */ 5584 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5585 PetscCall(PetscSFSetFromOptions(sf)); 5586 PetscCall(PetscSFSetUp(sf)); 5587 5588 PetscCall(PetscSFCreate(comm, &osf)); 5589 /* Off diag */ 5590 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5591 PetscCall(PetscSFSetFromOptions(osf)); 5592 PetscCall(PetscSFSetUp(osf)); 5593 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5594 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5595 /* We operate on the matrix internal data for saving memory */ 5596 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5597 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5598 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5599 /* Convert to global indices for diag matrix */ 5600 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5601 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5602 /* We want P_oth store global indices */ 5603 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5604 /* Use memory scalable approach */ 5605 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5606 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5607 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5608 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5609 /* Convert back to local indices */ 5610 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5611 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5612 nout = 0; 5613 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5614 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5615 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5616 /* Exchange values */ 5617 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5618 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5619 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5620 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5621 /* Stop PETSc from shrinking memory */ 5622 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5623 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5624 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5625 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5626 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5627 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5628 PetscCall(PetscSFDestroy(&sf)); 5629 PetscCall(PetscSFDestroy(&osf)); 5630 PetscFunctionReturn(0); 5631 } 5632 5633 /* 5634 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5635 * This supports MPIAIJ and MAIJ 5636 * */ 5637 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5638 { 5639 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5640 Mat_SeqAIJ *p_oth; 5641 IS rows, map; 5642 PetscHMapI hamp; 5643 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5644 MPI_Comm comm; 5645 PetscSF sf, osf; 5646 PetscBool has; 5647 5648 PetscFunctionBegin; 5649 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5650 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5651 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5652 * and then create a submatrix (that often is an overlapping matrix) 5653 * */ 5654 if (reuse == MAT_INITIAL_MATRIX) { 5655 /* Use a hash table to figure out unique keys */ 5656 PetscCall(PetscHMapICreate(&hamp)); 5657 PetscCall(PetscHMapIResize(hamp, a->B->cmap->n)); 5658 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5659 count = 0; 5660 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5661 for (i = 0; i < a->B->cmap->n; i++) { 5662 key = a->garray[i] / dof; 5663 PetscCall(PetscHMapIHas(hamp, key, &has)); 5664 if (!has) { 5665 mapping[i] = count; 5666 PetscCall(PetscHMapISet(hamp, key, count++)); 5667 } else { 5668 /* Current 'i' has the same value the previous step */ 5669 mapping[i] = count - 1; 5670 } 5671 } 5672 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5673 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5674 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count); 5675 PetscCall(PetscCalloc1(htsize, &rowindices)); 5676 off = 0; 5677 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5678 PetscCall(PetscHMapIDestroy(&hamp)); 5679 PetscCall(PetscSortInt(htsize, rowindices)); 5680 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5681 /* In case, the matrix was already created but users want to recreate the matrix */ 5682 PetscCall(MatDestroy(P_oth)); 5683 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5684 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5685 PetscCall(ISDestroy(&map)); 5686 PetscCall(ISDestroy(&rows)); 5687 } else if (reuse == MAT_REUSE_MATRIX) { 5688 /* If matrix was already created, we simply update values using SF objects 5689 * that as attached to the matrix ealier. 5690 */ 5691 const PetscScalar *pd_a, *po_a; 5692 5693 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5694 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5695 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5696 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5697 /* Update values in place */ 5698 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5699 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5700 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5701 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5702 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5703 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5704 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5705 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5706 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5707 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5708 PetscFunctionReturn(0); 5709 } 5710 5711 /*@C 5712 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5713 5714 Collective on A 5715 5716 Input Parameters: 5717 + A - the first matrix in `MATMPIAIJ` format 5718 . B - the second matrix in `MATMPIAIJ` format 5719 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5720 5721 Output Parameters: 5722 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5723 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5724 - B_seq - the sequential matrix generated 5725 5726 Level: developer 5727 5728 @*/ 5729 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5730 { 5731 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5732 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5733 IS isrowb, iscolb; 5734 Mat *bseq = NULL; 5735 5736 PetscFunctionBegin; 5737 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5738 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5739 } 5740 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5741 5742 if (scall == MAT_INITIAL_MATRIX) { 5743 start = A->cmap->rstart; 5744 cmap = a->garray; 5745 nzA = a->A->cmap->n; 5746 nzB = a->B->cmap->n; 5747 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5748 ncols = 0; 5749 for (i = 0; i < nzB; i++) { /* row < local row index */ 5750 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5751 else break; 5752 } 5753 imark = i; 5754 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5755 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5756 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5757 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5758 } else { 5759 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5760 isrowb = *rowb; 5761 iscolb = *colb; 5762 PetscCall(PetscMalloc1(1, &bseq)); 5763 bseq[0] = *B_seq; 5764 } 5765 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5766 *B_seq = bseq[0]; 5767 PetscCall(PetscFree(bseq)); 5768 if (!rowb) { 5769 PetscCall(ISDestroy(&isrowb)); 5770 } else { 5771 *rowb = isrowb; 5772 } 5773 if (!colb) { 5774 PetscCall(ISDestroy(&iscolb)); 5775 } else { 5776 *colb = iscolb; 5777 } 5778 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5779 PetscFunctionReturn(0); 5780 } 5781 5782 /* 5783 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5784 of the OFF-DIAGONAL portion of local A 5785 5786 Collective on Mat 5787 5788 Input Parameters: 5789 + A,B - the matrices in mpiaij format 5790 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5791 5792 Output Parameter: 5793 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5794 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5795 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5796 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5797 5798 Developer Note: 5799 This directly accesses information inside the VecScatter associated with the matrix-vector product 5800 for this matrix. This is not desirable.. 5801 5802 Level: developer 5803 5804 */ 5805 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5806 { 5807 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5808 Mat_SeqAIJ *b_oth; 5809 VecScatter ctx; 5810 MPI_Comm comm; 5811 const PetscMPIInt *rprocs, *sprocs; 5812 const PetscInt *srow, *rstarts, *sstarts; 5813 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5814 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5815 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5816 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5817 PetscMPIInt size, tag, rank, nreqs; 5818 5819 PetscFunctionBegin; 5820 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5821 PetscCallMPI(MPI_Comm_size(comm, &size)); 5822 5823 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5824 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5825 } 5826 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5827 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5828 5829 if (size == 1) { 5830 startsj_s = NULL; 5831 bufa_ptr = NULL; 5832 *B_oth = NULL; 5833 PetscFunctionReturn(0); 5834 } 5835 5836 ctx = a->Mvctx; 5837 tag = ((PetscObject)ctx)->tag; 5838 5839 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5840 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5841 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5842 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5843 PetscCall(PetscMalloc1(nreqs, &reqs)); 5844 rwaits = reqs; 5845 swaits = reqs + nrecvs; 5846 5847 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5848 if (scall == MAT_INITIAL_MATRIX) { 5849 /* i-array */ 5850 /*---------*/ 5851 /* post receives */ 5852 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5853 for (i = 0; i < nrecvs; i++) { 5854 rowlen = rvalues + rstarts[i] * rbs; 5855 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5856 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5857 } 5858 5859 /* pack the outgoing message */ 5860 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5861 5862 sstartsj[0] = 0; 5863 rstartsj[0] = 0; 5864 len = 0; /* total length of j or a array to be sent */ 5865 if (nsends) { 5866 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5867 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5868 } 5869 for (i = 0; i < nsends; i++) { 5870 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5871 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5872 for (j = 0; j < nrows; j++) { 5873 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5874 for (l = 0; l < sbs; l++) { 5875 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5876 5877 rowlen[j * sbs + l] = ncols; 5878 5879 len += ncols; 5880 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5881 } 5882 k++; 5883 } 5884 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5885 5886 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5887 } 5888 /* recvs and sends of i-array are completed */ 5889 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5890 PetscCall(PetscFree(svalues)); 5891 5892 /* allocate buffers for sending j and a arrays */ 5893 PetscCall(PetscMalloc1(len + 1, &bufj)); 5894 PetscCall(PetscMalloc1(len + 1, &bufa)); 5895 5896 /* create i-array of B_oth */ 5897 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5898 5899 b_othi[0] = 0; 5900 len = 0; /* total length of j or a array to be received */ 5901 k = 0; 5902 for (i = 0; i < nrecvs; i++) { 5903 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5904 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5905 for (j = 0; j < nrows; j++) { 5906 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5907 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5908 k++; 5909 } 5910 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5911 } 5912 PetscCall(PetscFree(rvalues)); 5913 5914 /* allocate space for j and a arrays of B_oth */ 5915 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5916 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5917 5918 /* j-array */ 5919 /*---------*/ 5920 /* post receives of j-array */ 5921 for (i = 0; i < nrecvs; i++) { 5922 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5923 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5924 } 5925 5926 /* pack the outgoing message j-array */ 5927 if (nsends) k = sstarts[0]; 5928 for (i = 0; i < nsends; i++) { 5929 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5930 bufJ = bufj + sstartsj[i]; 5931 for (j = 0; j < nrows; j++) { 5932 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5933 for (ll = 0; ll < sbs; ll++) { 5934 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5935 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5936 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5937 } 5938 } 5939 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5940 } 5941 5942 /* recvs and sends of j-array are completed */ 5943 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5944 } else if (scall == MAT_REUSE_MATRIX) { 5945 sstartsj = *startsj_s; 5946 rstartsj = *startsj_r; 5947 bufa = *bufa_ptr; 5948 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5949 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5950 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5951 5952 /* a-array */ 5953 /*---------*/ 5954 /* post receives of a-array */ 5955 for (i = 0; i < nrecvs; i++) { 5956 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5957 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5958 } 5959 5960 /* pack the outgoing message a-array */ 5961 if (nsends) k = sstarts[0]; 5962 for (i = 0; i < nsends; i++) { 5963 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5964 bufA = bufa + sstartsj[i]; 5965 for (j = 0; j < nrows; j++) { 5966 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5967 for (ll = 0; ll < sbs; ll++) { 5968 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5969 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5970 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5971 } 5972 } 5973 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5974 } 5975 /* recvs and sends of a-array are completed */ 5976 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5977 PetscCall(PetscFree(reqs)); 5978 5979 if (scall == MAT_INITIAL_MATRIX) { 5980 /* put together the new matrix */ 5981 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5982 5983 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5984 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5985 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5986 b_oth->free_a = PETSC_TRUE; 5987 b_oth->free_ij = PETSC_TRUE; 5988 b_oth->nonew = 0; 5989 5990 PetscCall(PetscFree(bufj)); 5991 if (!startsj_s || !bufa_ptr) { 5992 PetscCall(PetscFree2(sstartsj, rstartsj)); 5993 PetscCall(PetscFree(bufa_ptr)); 5994 } else { 5995 *startsj_s = sstartsj; 5996 *startsj_r = rstartsj; 5997 *bufa_ptr = bufa; 5998 } 5999 } else if (scall == MAT_REUSE_MATRIX) { 6000 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6001 } 6002 6003 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6004 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6005 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6006 PetscFunctionReturn(0); 6007 } 6008 6009 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6010 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6011 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6012 #if defined(PETSC_HAVE_MKL_SPARSE) 6013 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6014 #endif 6015 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6016 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6017 #if defined(PETSC_HAVE_ELEMENTAL) 6018 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6019 #endif 6020 #if defined(PETSC_HAVE_SCALAPACK) 6021 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6022 #endif 6023 #if defined(PETSC_HAVE_HYPRE) 6024 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6025 #endif 6026 #if defined(PETSC_HAVE_CUDA) 6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6028 #endif 6029 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6031 #endif 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6033 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6034 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6035 6036 /* 6037 Computes (B'*A')' since computing B*A directly is untenable 6038 6039 n p p 6040 [ ] [ ] [ ] 6041 m [ A ] * n [ B ] = m [ C ] 6042 [ ] [ ] [ ] 6043 6044 */ 6045 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6046 { 6047 Mat At, Bt, Ct; 6048 6049 PetscFunctionBegin; 6050 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6051 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6052 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6053 PetscCall(MatDestroy(&At)); 6054 PetscCall(MatDestroy(&Bt)); 6055 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6056 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6057 PetscCall(MatDestroy(&Ct)); 6058 PetscFunctionReturn(0); 6059 } 6060 6061 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6062 { 6063 PetscBool cisdense; 6064 6065 PetscFunctionBegin; 6066 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6067 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6068 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6069 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 6070 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6071 PetscCall(MatSetUp(C)); 6072 6073 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6074 PetscFunctionReturn(0); 6075 } 6076 6077 /* ----------------------------------------------------------------*/ 6078 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6079 { 6080 Mat_Product *product = C->product; 6081 Mat A = product->A, B = product->B; 6082 6083 PetscFunctionBegin; 6084 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6085 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6086 6087 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6088 C->ops->productsymbolic = MatProductSymbolic_AB; 6089 PetscFunctionReturn(0); 6090 } 6091 6092 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6093 { 6094 Mat_Product *product = C->product; 6095 6096 PetscFunctionBegin; 6097 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6098 PetscFunctionReturn(0); 6099 } 6100 6101 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6102 6103 Input Parameters: 6104 6105 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6106 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6107 6108 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6109 6110 For Set1, j1[] contains column indices of the nonzeros. 6111 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6112 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6113 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6114 6115 Similar for Set2. 6116 6117 This routine merges the two sets of nonzeros row by row and removes repeats. 6118 6119 Output Parameters: (memory is allocated by the caller) 6120 6121 i[],j[]: the CSR of the merged matrix, which has m rows. 6122 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6123 imap2[]: similar to imap1[], but for Set2. 6124 Note we order nonzeros row-by-row and from left to right. 6125 */ 6126 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6127 { 6128 PetscInt r, m; /* Row index of mat */ 6129 PetscCount t, t1, t2, b1, e1, b2, e2; 6130 6131 PetscFunctionBegin; 6132 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6133 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6134 i[0] = 0; 6135 for (r = 0; r < m; r++) { /* Do row by row merging */ 6136 b1 = rowBegin1[r]; 6137 e1 = rowEnd1[r]; 6138 b2 = rowBegin2[r]; 6139 e2 = rowEnd2[r]; 6140 while (b1 < e1 && b2 < e2) { 6141 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6142 j[t] = j1[b1]; 6143 imap1[t1] = t; 6144 imap2[t2] = t; 6145 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6146 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6147 t1++; 6148 t2++; 6149 t++; 6150 } else if (j1[b1] < j2[b2]) { 6151 j[t] = j1[b1]; 6152 imap1[t1] = t; 6153 b1 += jmap1[t1 + 1] - jmap1[t1]; 6154 t1++; 6155 t++; 6156 } else { 6157 j[t] = j2[b2]; 6158 imap2[t2] = t; 6159 b2 += jmap2[t2 + 1] - jmap2[t2]; 6160 t2++; 6161 t++; 6162 } 6163 } 6164 /* Merge the remaining in either j1[] or j2[] */ 6165 while (b1 < e1) { 6166 j[t] = j1[b1]; 6167 imap1[t1] = t; 6168 b1 += jmap1[t1 + 1] - jmap1[t1]; 6169 t1++; 6170 t++; 6171 } 6172 while (b2 < e2) { 6173 j[t] = j2[b2]; 6174 imap2[t2] = t; 6175 b2 += jmap2[t2 + 1] - jmap2[t2]; 6176 t2++; 6177 t++; 6178 } 6179 i[r + 1] = t; 6180 } 6181 PetscFunctionReturn(0); 6182 } 6183 6184 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6185 6186 Input Parameters: 6187 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6188 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6189 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6190 6191 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6192 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6193 6194 Output Parameters: 6195 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6196 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6197 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6198 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6199 6200 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6201 Atot: number of entries belonging to the diagonal block. 6202 Annz: number of unique nonzeros belonging to the diagonal block. 6203 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6204 repeats (i.e., same 'i,j' pair). 6205 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6206 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6207 6208 Atot: number of entries belonging to the diagonal block 6209 Annz: number of unique nonzeros belonging to the diagonal block. 6210 6211 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6212 6213 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6214 */ 6215 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6216 { 6217 PetscInt cstart, cend, rstart, rend, row, col; 6218 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6219 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6220 PetscCount k, m, p, q, r, s, mid; 6221 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6222 6223 PetscFunctionBegin; 6224 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6225 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6226 m = rend - rstart; 6227 6228 for (k = 0; k < n; k++) { 6229 if (i[k] >= 0) break; 6230 } /* Skip negative rows */ 6231 6232 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6233 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6234 */ 6235 while (k < n) { 6236 row = i[k]; 6237 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6238 for (s = k; s < n; s++) 6239 if (i[s] != row) break; 6240 for (p = k; p < s; p++) { 6241 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6242 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6243 } 6244 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6245 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6246 rowBegin[row - rstart] = k; 6247 rowMid[row - rstart] = mid; 6248 rowEnd[row - rstart] = s; 6249 6250 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6251 Atot += mid - k; 6252 Btot += s - mid; 6253 6254 /* Count unique nonzeros of this diag/offdiag row */ 6255 for (p = k; p < mid;) { 6256 col = j[p]; 6257 do { 6258 j[p] += PETSC_MAX_INT; 6259 p++; 6260 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6261 Annz++; 6262 } 6263 6264 for (p = mid; p < s;) { 6265 col = j[p]; 6266 do { 6267 p++; 6268 } while (p < s && j[p] == col); 6269 Bnnz++; 6270 } 6271 k = s; 6272 } 6273 6274 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6275 PetscCall(PetscMalloc1(Atot, &Aperm)); 6276 PetscCall(PetscMalloc1(Btot, &Bperm)); 6277 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6278 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6279 6280 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6281 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6282 for (r = 0; r < m; r++) { 6283 k = rowBegin[r]; 6284 mid = rowMid[r]; 6285 s = rowEnd[r]; 6286 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6287 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6288 Atot += mid - k; 6289 Btot += s - mid; 6290 6291 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6292 for (p = k; p < mid;) { 6293 col = j[p]; 6294 q = p; 6295 do { 6296 p++; 6297 } while (p < mid && j[p] == col); 6298 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6299 Annz++; 6300 } 6301 6302 for (p = mid; p < s;) { 6303 col = j[p]; 6304 q = p; 6305 do { 6306 p++; 6307 } while (p < s && j[p] == col); 6308 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6309 Bnnz++; 6310 } 6311 } 6312 /* Output */ 6313 *Aperm_ = Aperm; 6314 *Annz_ = Annz; 6315 *Atot_ = Atot; 6316 *Ajmap_ = Ajmap; 6317 *Bperm_ = Bperm; 6318 *Bnnz_ = Bnnz; 6319 *Btot_ = Btot; 6320 *Bjmap_ = Bjmap; 6321 PetscFunctionReturn(0); 6322 } 6323 6324 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6325 6326 Input Parameters: 6327 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6328 nnz: number of unique nonzeros in the merged matrix 6329 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6330 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6331 6332 Output Parameter: (memory is allocated by the caller) 6333 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6334 6335 Example: 6336 nnz1 = 4 6337 nnz = 6 6338 imap = [1,3,4,5] 6339 jmap = [0,3,5,6,7] 6340 then, 6341 jmap_new = [0,0,3,3,5,6,7] 6342 */ 6343 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6344 { 6345 PetscCount k, p; 6346 6347 PetscFunctionBegin; 6348 jmap_new[0] = 0; 6349 p = nnz; /* p loops over jmap_new[] backwards */ 6350 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6351 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6352 } 6353 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6354 PetscFunctionReturn(0); 6355 } 6356 6357 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6358 { 6359 MPI_Comm comm; 6360 PetscMPIInt rank, size; 6361 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6362 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6363 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6364 6365 PetscFunctionBegin; 6366 PetscCall(PetscFree(mpiaij->garray)); 6367 PetscCall(VecDestroy(&mpiaij->lvec)); 6368 #if defined(PETSC_USE_CTABLE) 6369 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6370 #else 6371 PetscCall(PetscFree(mpiaij->colmap)); 6372 #endif 6373 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6374 mat->assembled = PETSC_FALSE; 6375 mat->was_assembled = PETSC_FALSE; 6376 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6377 6378 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6379 PetscCallMPI(MPI_Comm_size(comm, &size)); 6380 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6381 PetscCall(PetscLayoutSetUp(mat->rmap)); 6382 PetscCall(PetscLayoutSetUp(mat->cmap)); 6383 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6384 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6385 PetscCall(MatGetLocalSize(mat, &m, &n)); 6386 PetscCall(MatGetSize(mat, &M, &N)); 6387 6388 /* ---------------------------------------------------------------------------*/ 6389 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6390 /* entries come first, then local rows, then remote rows. */ 6391 /* ---------------------------------------------------------------------------*/ 6392 PetscCount n1 = coo_n, *perm1; 6393 PetscInt *i1 = coo_i, *j1 = coo_j; 6394 6395 PetscCall(PetscMalloc1(n1, &perm1)); 6396 for (k = 0; k < n1; k++) perm1[k] = k; 6397 6398 /* Manipulate indices so that entries with negative row or col indices will have smallest 6399 row indices, local entries will have greater but negative row indices, and remote entries 6400 will have positive row indices. 6401 */ 6402 for (k = 0; k < n1; k++) { 6403 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6404 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6405 else { 6406 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6407 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6408 } 6409 } 6410 6411 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6412 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6413 for (k = 0; k < n1; k++) { 6414 if (i1[k] > PETSC_MIN_INT) break; 6415 } /* Advance k to the first entry we need to take care of */ 6416 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6417 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6418 6419 /* ---------------------------------------------------------------------------*/ 6420 /* Split local rows into diag/offdiag portions */ 6421 /* ---------------------------------------------------------------------------*/ 6422 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6423 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6424 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6425 6426 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6427 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6428 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6429 6430 /* ---------------------------------------------------------------------------*/ 6431 /* Send remote rows to their owner */ 6432 /* ---------------------------------------------------------------------------*/ 6433 /* Find which rows should be sent to which remote ranks*/ 6434 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6435 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6436 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6437 const PetscInt *ranges; 6438 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6439 6440 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6441 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6442 for (k = rem; k < n1;) { 6443 PetscMPIInt owner; 6444 PetscInt firstRow, lastRow; 6445 6446 /* Locate a row range */ 6447 firstRow = i1[k]; /* first row of this owner */ 6448 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6449 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6450 6451 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6452 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6453 6454 /* All entries in [k,p) belong to this remote owner */ 6455 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6456 PetscMPIInt *sendto2; 6457 PetscInt *nentries2; 6458 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6459 6460 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6461 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6462 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6463 PetscCall(PetscFree2(sendto, nentries2)); 6464 sendto = sendto2; 6465 nentries = nentries2; 6466 maxNsend = maxNsend2; 6467 } 6468 sendto[nsend] = owner; 6469 nentries[nsend] = p - k; 6470 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6471 nsend++; 6472 k = p; 6473 } 6474 6475 /* Build 1st SF to know offsets on remote to send data */ 6476 PetscSF sf1; 6477 PetscInt nroots = 1, nroots2 = 0; 6478 PetscInt nleaves = nsend, nleaves2 = 0; 6479 PetscInt *offsets; 6480 PetscSFNode *iremote; 6481 6482 PetscCall(PetscSFCreate(comm, &sf1)); 6483 PetscCall(PetscMalloc1(nsend, &iremote)); 6484 PetscCall(PetscMalloc1(nsend, &offsets)); 6485 for (k = 0; k < nsend; k++) { 6486 iremote[k].rank = sendto[k]; 6487 iremote[k].index = 0; 6488 nleaves2 += nentries[k]; 6489 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6490 } 6491 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6492 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6493 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6494 PetscCall(PetscSFDestroy(&sf1)); 6495 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6496 6497 /* Build 2nd SF to send remote COOs to their owner */ 6498 PetscSF sf2; 6499 nroots = nroots2; 6500 nleaves = nleaves2; 6501 PetscCall(PetscSFCreate(comm, &sf2)); 6502 PetscCall(PetscSFSetFromOptions(sf2)); 6503 PetscCall(PetscMalloc1(nleaves, &iremote)); 6504 p = 0; 6505 for (k = 0; k < nsend; k++) { 6506 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6507 for (q = 0; q < nentries[k]; q++, p++) { 6508 iremote[p].rank = sendto[k]; 6509 iremote[p].index = offsets[k] + q; 6510 } 6511 } 6512 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6513 6514 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6515 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6516 6517 /* Send the remote COOs to their owner */ 6518 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6519 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6520 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6521 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6522 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6523 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6524 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6525 6526 PetscCall(PetscFree(offsets)); 6527 PetscCall(PetscFree2(sendto, nentries)); 6528 6529 /* ---------------------------------------------------------------*/ 6530 /* Sort received COOs by row along with the permutation array */ 6531 /* ---------------------------------------------------------------*/ 6532 for (k = 0; k < n2; k++) perm2[k] = k; 6533 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6534 6535 /* ---------------------------------------------------------------*/ 6536 /* Split received COOs into diag/offdiag portions */ 6537 /* ---------------------------------------------------------------*/ 6538 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6539 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6540 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6541 6542 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6543 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6544 6545 /* --------------------------------------------------------------------------*/ 6546 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6547 /* --------------------------------------------------------------------------*/ 6548 PetscInt *Ai, *Bi; 6549 PetscInt *Aj, *Bj; 6550 6551 PetscCall(PetscMalloc1(m + 1, &Ai)); 6552 PetscCall(PetscMalloc1(m + 1, &Bi)); 6553 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6554 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6555 6556 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6557 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6558 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6559 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6560 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6561 6562 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6563 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6564 6565 /* --------------------------------------------------------------------------*/ 6566 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6567 /* expect nonzeros in A/B most likely have local contributing entries */ 6568 /* --------------------------------------------------------------------------*/ 6569 PetscInt Annz = Ai[m]; 6570 PetscInt Bnnz = Bi[m]; 6571 PetscCount *Ajmap1_new, *Bjmap1_new; 6572 6573 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6574 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6575 6576 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6577 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6578 6579 PetscCall(PetscFree(Aimap1)); 6580 PetscCall(PetscFree(Ajmap1)); 6581 PetscCall(PetscFree(Bimap1)); 6582 PetscCall(PetscFree(Bjmap1)); 6583 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6584 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6585 PetscCall(PetscFree(perm1)); 6586 PetscCall(PetscFree3(i2, j2, perm2)); 6587 6588 Ajmap1 = Ajmap1_new; 6589 Bjmap1 = Bjmap1_new; 6590 6591 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6592 if (Annz < Annz1 + Annz2) { 6593 PetscInt *Aj_new; 6594 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6595 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6596 PetscCall(PetscFree(Aj)); 6597 Aj = Aj_new; 6598 } 6599 6600 if (Bnnz < Bnnz1 + Bnnz2) { 6601 PetscInt *Bj_new; 6602 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6603 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6604 PetscCall(PetscFree(Bj)); 6605 Bj = Bj_new; 6606 } 6607 6608 /* --------------------------------------------------------------------------------*/ 6609 /* Create new submatrices for on-process and off-process coupling */ 6610 /* --------------------------------------------------------------------------------*/ 6611 PetscScalar *Aa, *Ba; 6612 MatType rtype; 6613 Mat_SeqAIJ *a, *b; 6614 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6615 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6616 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6617 if (cstart) { 6618 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6619 } 6620 PetscCall(MatDestroy(&mpiaij->A)); 6621 PetscCall(MatDestroy(&mpiaij->B)); 6622 PetscCall(MatGetRootType_Private(mat, &rtype)); 6623 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6624 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6625 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6626 6627 a = (Mat_SeqAIJ *)mpiaij->A->data; 6628 b = (Mat_SeqAIJ *)mpiaij->B->data; 6629 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6630 a->free_a = b->free_a = PETSC_TRUE; 6631 a->free_ij = b->free_ij = PETSC_TRUE; 6632 6633 /* conversion must happen AFTER multiply setup */ 6634 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6635 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6636 PetscCall(VecDestroy(&mpiaij->lvec)); 6637 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6638 6639 mpiaij->coo_n = coo_n; 6640 mpiaij->coo_sf = sf2; 6641 mpiaij->sendlen = nleaves; 6642 mpiaij->recvlen = nroots; 6643 6644 mpiaij->Annz = Annz; 6645 mpiaij->Bnnz = Bnnz; 6646 6647 mpiaij->Annz2 = Annz2; 6648 mpiaij->Bnnz2 = Bnnz2; 6649 6650 mpiaij->Atot1 = Atot1; 6651 mpiaij->Atot2 = Atot2; 6652 mpiaij->Btot1 = Btot1; 6653 mpiaij->Btot2 = Btot2; 6654 6655 mpiaij->Ajmap1 = Ajmap1; 6656 mpiaij->Aperm1 = Aperm1; 6657 6658 mpiaij->Bjmap1 = Bjmap1; 6659 mpiaij->Bperm1 = Bperm1; 6660 6661 mpiaij->Aimap2 = Aimap2; 6662 mpiaij->Ajmap2 = Ajmap2; 6663 mpiaij->Aperm2 = Aperm2; 6664 6665 mpiaij->Bimap2 = Bimap2; 6666 mpiaij->Bjmap2 = Bjmap2; 6667 mpiaij->Bperm2 = Bperm2; 6668 6669 mpiaij->Cperm1 = Cperm1; 6670 6671 /* Allocate in preallocation. If not used, it has zero cost on host */ 6672 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6673 PetscFunctionReturn(0); 6674 } 6675 6676 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6677 { 6678 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6679 Mat A = mpiaij->A, B = mpiaij->B; 6680 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6681 PetscScalar *Aa, *Ba; 6682 PetscScalar *sendbuf = mpiaij->sendbuf; 6683 PetscScalar *recvbuf = mpiaij->recvbuf; 6684 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6685 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6686 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6687 const PetscCount *Cperm1 = mpiaij->Cperm1; 6688 6689 PetscFunctionBegin; 6690 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6691 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6692 6693 /* Pack entries to be sent to remote */ 6694 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6695 6696 /* Send remote entries to their owner and overlap the communication with local computation */ 6697 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6698 /* Add local entries to A and B */ 6699 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6700 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6701 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6702 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6703 } 6704 for (PetscCount i = 0; i < Bnnz; i++) { 6705 PetscScalar sum = 0.0; 6706 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6707 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6708 } 6709 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6710 6711 /* Add received remote entries to A and B */ 6712 for (PetscCount i = 0; i < Annz2; i++) { 6713 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6714 } 6715 for (PetscCount i = 0; i < Bnnz2; i++) { 6716 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6717 } 6718 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6719 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6720 PetscFunctionReturn(0); 6721 } 6722 6723 /* ----------------------------------------------------------------*/ 6724 6725 /*MC 6726 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6727 6728 Options Database Keys: 6729 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6730 6731 Level: beginner 6732 6733 Notes: 6734 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6735 in this case the values associated with the rows and columns one passes in are set to zero 6736 in the matrix 6737 6738 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6739 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6740 6741 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6742 M*/ 6743 6744 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6745 { 6746 Mat_MPIAIJ *b; 6747 PetscMPIInt size; 6748 6749 PetscFunctionBegin; 6750 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6751 6752 PetscCall(PetscNew(&b)); 6753 B->data = (void *)b; 6754 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6755 B->assembled = PETSC_FALSE; 6756 B->insertmode = NOT_SET_VALUES; 6757 b->size = size; 6758 6759 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6760 6761 /* build cache for off array entries formed */ 6762 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6763 6764 b->donotstash = PETSC_FALSE; 6765 b->colmap = NULL; 6766 b->garray = NULL; 6767 b->roworiented = PETSC_TRUE; 6768 6769 /* stuff used for matrix vector multiply */ 6770 b->lvec = NULL; 6771 b->Mvctx = NULL; 6772 6773 /* stuff for MatGetRow() */ 6774 b->rowindices = NULL; 6775 b->rowvalues = NULL; 6776 b->getrowactive = PETSC_FALSE; 6777 6778 /* flexible pointer used in CUSPARSE classes */ 6779 b->spptr = NULL; 6780 6781 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6782 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6783 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6784 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6785 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6786 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6787 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6788 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6789 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6790 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6791 #if defined(PETSC_HAVE_CUDA) 6792 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6793 #endif 6794 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6795 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6796 #endif 6797 #if defined(PETSC_HAVE_MKL_SPARSE) 6798 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6799 #endif 6800 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6801 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6802 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6803 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6804 #if defined(PETSC_HAVE_ELEMENTAL) 6805 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6806 #endif 6807 #if defined(PETSC_HAVE_SCALAPACK) 6808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6809 #endif 6810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6811 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6812 #if defined(PETSC_HAVE_HYPRE) 6813 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6814 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6815 #endif 6816 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6817 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6818 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6819 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6820 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6821 PetscFunctionReturn(0); 6822 } 6823 6824 /*@C 6825 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6826 and "off-diagonal" part of the matrix in CSR format. 6827 6828 Collective 6829 6830 Input Parameters: 6831 + comm - MPI communicator 6832 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6833 . n - This value should be the same as the local size used in creating the 6834 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6835 calculated if N is given) For square matrices n is almost always m. 6836 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6837 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6838 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6839 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6840 . a - matrix values 6841 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6842 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6843 - oa - matrix values 6844 6845 Output Parameter: 6846 . mat - the matrix 6847 6848 Level: advanced 6849 6850 Notes: 6851 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6852 must free the arrays once the matrix has been destroyed and not before. 6853 6854 The i and j indices are 0 based 6855 6856 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6857 6858 This sets local rows and cannot be used to set off-processor values. 6859 6860 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6861 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6862 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6863 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6864 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6865 communication if it is known that only local entries will be set. 6866 6867 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6868 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6869 @*/ 6870 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6871 { 6872 Mat_MPIAIJ *maij; 6873 6874 PetscFunctionBegin; 6875 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6876 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6877 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6878 PetscCall(MatCreate(comm, mat)); 6879 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6880 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6881 maij = (Mat_MPIAIJ *)(*mat)->data; 6882 6883 (*mat)->preallocated = PETSC_TRUE; 6884 6885 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6886 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6887 6888 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6889 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6890 6891 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6892 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6893 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6894 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6895 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6896 PetscFunctionReturn(0); 6897 } 6898 6899 typedef struct { 6900 Mat *mp; /* intermediate products */ 6901 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6902 PetscInt cp; /* number of intermediate products */ 6903 6904 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6905 PetscInt *startsj_s, *startsj_r; 6906 PetscScalar *bufa; 6907 Mat P_oth; 6908 6909 /* may take advantage of merging product->B */ 6910 Mat Bloc; /* B-local by merging diag and off-diag */ 6911 6912 /* cusparse does not have support to split between symbolic and numeric phases. 6913 When api_user is true, we don't need to update the numerical values 6914 of the temporary storage */ 6915 PetscBool reusesym; 6916 6917 /* support for COO values insertion */ 6918 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6919 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6920 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6921 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6922 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6923 PetscMemType mtype; 6924 6925 /* customization */ 6926 PetscBool abmerge; 6927 PetscBool P_oth_bind; 6928 } MatMatMPIAIJBACKEND; 6929 6930 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6931 { 6932 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6933 PetscInt i; 6934 6935 PetscFunctionBegin; 6936 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6937 PetscCall(PetscFree(mmdata->bufa)); 6938 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6939 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6940 PetscCall(MatDestroy(&mmdata->P_oth)); 6941 PetscCall(MatDestroy(&mmdata->Bloc)); 6942 PetscCall(PetscSFDestroy(&mmdata->sf)); 6943 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6944 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6945 PetscCall(PetscFree(mmdata->own[0])); 6946 PetscCall(PetscFree(mmdata->own)); 6947 PetscCall(PetscFree(mmdata->off[0])); 6948 PetscCall(PetscFree(mmdata->off)); 6949 PetscCall(PetscFree(mmdata)); 6950 PetscFunctionReturn(0); 6951 } 6952 6953 /* Copy selected n entries with indices in idx[] of A to v[]. 6954 If idx is NULL, copy the whole data array of A to v[] 6955 */ 6956 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6957 { 6958 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6959 6960 PetscFunctionBegin; 6961 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6962 if (f) { 6963 PetscCall((*f)(A, n, idx, v)); 6964 } else { 6965 const PetscScalar *vv; 6966 6967 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6968 if (n && idx) { 6969 PetscScalar *w = v; 6970 const PetscInt *oi = idx; 6971 PetscInt j; 6972 6973 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6974 } else { 6975 PetscCall(PetscArraycpy(v, vv, n)); 6976 } 6977 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 6978 } 6979 PetscFunctionReturn(0); 6980 } 6981 6982 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 6983 { 6984 MatMatMPIAIJBACKEND *mmdata; 6985 PetscInt i, n_d, n_o; 6986 6987 PetscFunctionBegin; 6988 MatCheckProduct(C, 1); 6989 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 6990 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 6991 if (!mmdata->reusesym) { /* update temporary matrices */ 6992 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 6993 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 6994 } 6995 mmdata->reusesym = PETSC_FALSE; 6996 6997 for (i = 0; i < mmdata->cp; i++) { 6998 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 6999 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7000 } 7001 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7002 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7003 7004 if (mmdata->mptmp[i]) continue; 7005 if (noff) { 7006 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7007 7008 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7009 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7010 n_o += noff; 7011 n_d += nown; 7012 } else { 7013 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7014 7015 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7016 n_d += mm->nz; 7017 } 7018 } 7019 if (mmdata->hasoffproc) { /* offprocess insertion */ 7020 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7021 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7022 } 7023 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7024 PetscFunctionReturn(0); 7025 } 7026 7027 /* Support for Pt * A, A * P, or Pt * A * P */ 7028 #define MAX_NUMBER_INTERMEDIATE 4 7029 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7030 { 7031 Mat_Product *product = C->product; 7032 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7033 Mat_MPIAIJ *a, *p; 7034 MatMatMPIAIJBACKEND *mmdata; 7035 ISLocalToGlobalMapping P_oth_l2g = NULL; 7036 IS glob = NULL; 7037 const char *prefix; 7038 char pprefix[256]; 7039 const PetscInt *globidx, *P_oth_idx; 7040 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7041 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7042 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7043 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7044 /* a base offset; type-2: sparse with a local to global map table */ 7045 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7046 7047 MatProductType ptype; 7048 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk; 7049 PetscMPIInt size; 7050 7051 PetscFunctionBegin; 7052 MatCheckProduct(C, 1); 7053 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7054 ptype = product->type; 7055 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7056 ptype = MATPRODUCT_AB; 7057 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7058 } 7059 switch (ptype) { 7060 case MATPRODUCT_AB: 7061 A = product->A; 7062 P = product->B; 7063 m = A->rmap->n; 7064 n = P->cmap->n; 7065 M = A->rmap->N; 7066 N = P->cmap->N; 7067 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7068 break; 7069 case MATPRODUCT_AtB: 7070 P = product->A; 7071 A = product->B; 7072 m = P->cmap->n; 7073 n = A->cmap->n; 7074 M = P->cmap->N; 7075 N = A->cmap->N; 7076 hasoffproc = PETSC_TRUE; 7077 break; 7078 case MATPRODUCT_PtAP: 7079 A = product->A; 7080 P = product->B; 7081 m = P->cmap->n; 7082 n = P->cmap->n; 7083 M = P->cmap->N; 7084 N = P->cmap->N; 7085 hasoffproc = PETSC_TRUE; 7086 break; 7087 default: 7088 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7089 } 7090 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7091 if (size == 1) hasoffproc = PETSC_FALSE; 7092 7093 /* defaults */ 7094 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7095 mp[i] = NULL; 7096 mptmp[i] = PETSC_FALSE; 7097 rmapt[i] = -1; 7098 cmapt[i] = -1; 7099 rmapa[i] = NULL; 7100 cmapa[i] = NULL; 7101 } 7102 7103 /* customization */ 7104 PetscCall(PetscNew(&mmdata)); 7105 mmdata->reusesym = product->api_user; 7106 if (ptype == MATPRODUCT_AB) { 7107 if (product->api_user) { 7108 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7109 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7110 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7111 PetscOptionsEnd(); 7112 } else { 7113 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7114 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7115 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7116 PetscOptionsEnd(); 7117 } 7118 } else if (ptype == MATPRODUCT_PtAP) { 7119 if (product->api_user) { 7120 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7121 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7122 PetscOptionsEnd(); 7123 } else { 7124 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7125 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7126 PetscOptionsEnd(); 7127 } 7128 } 7129 a = (Mat_MPIAIJ *)A->data; 7130 p = (Mat_MPIAIJ *)P->data; 7131 PetscCall(MatSetSizes(C, m, n, M, N)); 7132 PetscCall(PetscLayoutSetUp(C->rmap)); 7133 PetscCall(PetscLayoutSetUp(C->cmap)); 7134 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7135 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7136 7137 cp = 0; 7138 switch (ptype) { 7139 case MATPRODUCT_AB: /* A * P */ 7140 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7141 7142 /* A_diag * P_local (merged or not) */ 7143 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7144 /* P is product->B */ 7145 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7146 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7147 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7148 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7149 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7150 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7151 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7152 mp[cp]->product->api_user = product->api_user; 7153 PetscCall(MatProductSetFromOptions(mp[cp])); 7154 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7155 PetscCall(ISGetIndices(glob, &globidx)); 7156 rmapt[cp] = 1; 7157 cmapt[cp] = 2; 7158 cmapa[cp] = globidx; 7159 mptmp[cp] = PETSC_FALSE; 7160 cp++; 7161 } else { /* A_diag * P_diag and A_diag * P_off */ 7162 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7163 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7164 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7165 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7166 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7167 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7168 mp[cp]->product->api_user = product->api_user; 7169 PetscCall(MatProductSetFromOptions(mp[cp])); 7170 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7171 rmapt[cp] = 1; 7172 cmapt[cp] = 1; 7173 mptmp[cp] = PETSC_FALSE; 7174 cp++; 7175 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7176 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7177 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7178 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7179 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7180 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7181 mp[cp]->product->api_user = product->api_user; 7182 PetscCall(MatProductSetFromOptions(mp[cp])); 7183 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7184 rmapt[cp] = 1; 7185 cmapt[cp] = 2; 7186 cmapa[cp] = p->garray; 7187 mptmp[cp] = PETSC_FALSE; 7188 cp++; 7189 } 7190 7191 /* A_off * P_other */ 7192 if (mmdata->P_oth) { 7193 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7194 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7195 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7196 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7197 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7198 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7199 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7200 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7201 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7202 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7203 mp[cp]->product->api_user = product->api_user; 7204 PetscCall(MatProductSetFromOptions(mp[cp])); 7205 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7206 rmapt[cp] = 1; 7207 cmapt[cp] = 2; 7208 cmapa[cp] = P_oth_idx; 7209 mptmp[cp] = PETSC_FALSE; 7210 cp++; 7211 } 7212 break; 7213 7214 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7215 /* A is product->B */ 7216 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7217 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7218 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7219 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7220 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7221 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7222 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7223 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7224 mp[cp]->product->api_user = product->api_user; 7225 PetscCall(MatProductSetFromOptions(mp[cp])); 7226 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7227 PetscCall(ISGetIndices(glob, &globidx)); 7228 rmapt[cp] = 2; 7229 rmapa[cp] = globidx; 7230 cmapt[cp] = 2; 7231 cmapa[cp] = globidx; 7232 mptmp[cp] = PETSC_FALSE; 7233 cp++; 7234 } else { 7235 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7236 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7237 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7238 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7239 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7240 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7241 mp[cp]->product->api_user = product->api_user; 7242 PetscCall(MatProductSetFromOptions(mp[cp])); 7243 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7244 PetscCall(ISGetIndices(glob, &globidx)); 7245 rmapt[cp] = 1; 7246 cmapt[cp] = 2; 7247 cmapa[cp] = globidx; 7248 mptmp[cp] = PETSC_FALSE; 7249 cp++; 7250 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7251 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7252 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7253 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7254 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7255 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7256 mp[cp]->product->api_user = product->api_user; 7257 PetscCall(MatProductSetFromOptions(mp[cp])); 7258 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7259 rmapt[cp] = 2; 7260 rmapa[cp] = p->garray; 7261 cmapt[cp] = 2; 7262 cmapa[cp] = globidx; 7263 mptmp[cp] = PETSC_FALSE; 7264 cp++; 7265 } 7266 break; 7267 case MATPRODUCT_PtAP: 7268 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7269 /* P is product->B */ 7270 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7271 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7272 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7273 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7274 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7275 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7276 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7277 mp[cp]->product->api_user = product->api_user; 7278 PetscCall(MatProductSetFromOptions(mp[cp])); 7279 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7280 PetscCall(ISGetIndices(glob, &globidx)); 7281 rmapt[cp] = 2; 7282 rmapa[cp] = globidx; 7283 cmapt[cp] = 2; 7284 cmapa[cp] = globidx; 7285 mptmp[cp] = PETSC_FALSE; 7286 cp++; 7287 if (mmdata->P_oth) { 7288 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7289 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7290 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7291 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7292 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7293 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7294 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7295 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7296 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7297 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7298 mp[cp]->product->api_user = product->api_user; 7299 PetscCall(MatProductSetFromOptions(mp[cp])); 7300 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7301 mptmp[cp] = PETSC_TRUE; 7302 cp++; 7303 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7304 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7305 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7306 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7307 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7308 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7309 mp[cp]->product->api_user = product->api_user; 7310 PetscCall(MatProductSetFromOptions(mp[cp])); 7311 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7312 rmapt[cp] = 2; 7313 rmapa[cp] = globidx; 7314 cmapt[cp] = 2; 7315 cmapa[cp] = P_oth_idx; 7316 mptmp[cp] = PETSC_FALSE; 7317 cp++; 7318 } 7319 break; 7320 default: 7321 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7322 } 7323 /* sanity check */ 7324 if (size > 1) 7325 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7326 7327 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7328 for (i = 0; i < cp; i++) { 7329 mmdata->mp[i] = mp[i]; 7330 mmdata->mptmp[i] = mptmp[i]; 7331 } 7332 mmdata->cp = cp; 7333 C->product->data = mmdata; 7334 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7335 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7336 7337 /* memory type */ 7338 mmdata->mtype = PETSC_MEMTYPE_HOST; 7339 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7340 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7341 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7342 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7343 7344 /* prepare coo coordinates for values insertion */ 7345 7346 /* count total nonzeros of those intermediate seqaij Mats 7347 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7348 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7349 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7350 */ 7351 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7352 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7353 if (mptmp[cp]) continue; 7354 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7355 const PetscInt *rmap = rmapa[cp]; 7356 const PetscInt mr = mp[cp]->rmap->n; 7357 const PetscInt rs = C->rmap->rstart; 7358 const PetscInt re = C->rmap->rend; 7359 const PetscInt *ii = mm->i; 7360 for (i = 0; i < mr; i++) { 7361 const PetscInt gr = rmap[i]; 7362 const PetscInt nz = ii[i + 1] - ii[i]; 7363 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7364 else ncoo_oown += nz; /* this row is local */ 7365 } 7366 } else ncoo_d += mm->nz; 7367 } 7368 7369 /* 7370 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7371 7372 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7373 7374 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7375 7376 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7377 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7378 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7379 7380 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7381 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7382 */ 7383 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7384 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7385 7386 /* gather (i,j) of nonzeros inserted by remote procs */ 7387 if (hasoffproc) { 7388 PetscSF msf; 7389 PetscInt ncoo2, *coo_i2, *coo_j2; 7390 7391 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7392 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7393 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7394 7395 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7396 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7397 PetscInt *idxoff = mmdata->off[cp]; 7398 PetscInt *idxown = mmdata->own[cp]; 7399 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7400 const PetscInt *rmap = rmapa[cp]; 7401 const PetscInt *cmap = cmapa[cp]; 7402 const PetscInt *ii = mm->i; 7403 PetscInt *coi = coo_i + ncoo_o; 7404 PetscInt *coj = coo_j + ncoo_o; 7405 const PetscInt mr = mp[cp]->rmap->n; 7406 const PetscInt rs = C->rmap->rstart; 7407 const PetscInt re = C->rmap->rend; 7408 const PetscInt cs = C->cmap->rstart; 7409 for (i = 0; i < mr; i++) { 7410 const PetscInt *jj = mm->j + ii[i]; 7411 const PetscInt gr = rmap[i]; 7412 const PetscInt nz = ii[i + 1] - ii[i]; 7413 if (gr < rs || gr >= re) { /* this is an offproc row */ 7414 for (j = ii[i]; j < ii[i + 1]; j++) { 7415 *coi++ = gr; 7416 *idxoff++ = j; 7417 } 7418 if (!cmapt[cp]) { /* already global */ 7419 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7420 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7421 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7422 } else { /* offdiag */ 7423 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7424 } 7425 ncoo_o += nz; 7426 } else { /* this is a local row */ 7427 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7428 } 7429 } 7430 } 7431 mmdata->off[cp + 1] = idxoff; 7432 mmdata->own[cp + 1] = idxown; 7433 } 7434 7435 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7436 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7437 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7438 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7439 ncoo = ncoo_d + ncoo_oown + ncoo2; 7440 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7441 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7442 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7443 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7444 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7445 PetscCall(PetscFree2(coo_i, coo_j)); 7446 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7447 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7448 coo_i = coo_i2; 7449 coo_j = coo_j2; 7450 } else { /* no offproc values insertion */ 7451 ncoo = ncoo_d; 7452 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7453 7454 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7455 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7456 PetscCall(PetscSFSetUp(mmdata->sf)); 7457 } 7458 mmdata->hasoffproc = hasoffproc; 7459 7460 /* gather (i,j) of nonzeros inserted locally */ 7461 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7462 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7463 PetscInt *coi = coo_i + ncoo_d; 7464 PetscInt *coj = coo_j + ncoo_d; 7465 const PetscInt *jj = mm->j; 7466 const PetscInt *ii = mm->i; 7467 const PetscInt *cmap = cmapa[cp]; 7468 const PetscInt *rmap = rmapa[cp]; 7469 const PetscInt mr = mp[cp]->rmap->n; 7470 const PetscInt rs = C->rmap->rstart; 7471 const PetscInt re = C->rmap->rend; 7472 const PetscInt cs = C->cmap->rstart; 7473 7474 if (mptmp[cp]) continue; 7475 if (rmapt[cp] == 1) { /* consecutive rows */ 7476 /* fill coo_i */ 7477 for (i = 0; i < mr; i++) { 7478 const PetscInt gr = i + rs; 7479 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7480 } 7481 /* fill coo_j */ 7482 if (!cmapt[cp]) { /* type-0, already global */ 7483 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7484 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7485 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7486 } else { /* type-2, local to global for sparse columns */ 7487 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7488 } 7489 ncoo_d += mm->nz; 7490 } else if (rmapt[cp] == 2) { /* sparse rows */ 7491 for (i = 0; i < mr; i++) { 7492 const PetscInt *jj = mm->j + ii[i]; 7493 const PetscInt gr = rmap[i]; 7494 const PetscInt nz = ii[i + 1] - ii[i]; 7495 if (gr >= rs && gr < re) { /* local rows */ 7496 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7497 if (!cmapt[cp]) { /* type-0, already global */ 7498 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7499 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7500 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7501 } else { /* type-2, local to global for sparse columns */ 7502 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7503 } 7504 ncoo_d += nz; 7505 } 7506 } 7507 } 7508 } 7509 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7510 PetscCall(ISDestroy(&glob)); 7511 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7512 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7513 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7514 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7515 7516 /* preallocate with COO data */ 7517 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7518 PetscCall(PetscFree2(coo_i, coo_j)); 7519 PetscFunctionReturn(0); 7520 } 7521 7522 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7523 { 7524 Mat_Product *product = mat->product; 7525 #if defined(PETSC_HAVE_DEVICE) 7526 PetscBool match = PETSC_FALSE; 7527 PetscBool usecpu = PETSC_FALSE; 7528 #else 7529 PetscBool match = PETSC_TRUE; 7530 #endif 7531 7532 PetscFunctionBegin; 7533 MatCheckProduct(mat, 1); 7534 #if defined(PETSC_HAVE_DEVICE) 7535 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7536 if (match) { /* we can always fallback to the CPU if requested */ 7537 switch (product->type) { 7538 case MATPRODUCT_AB: 7539 if (product->api_user) { 7540 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7541 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7542 PetscOptionsEnd(); 7543 } else { 7544 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7545 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7546 PetscOptionsEnd(); 7547 } 7548 break; 7549 case MATPRODUCT_AtB: 7550 if (product->api_user) { 7551 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7552 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7553 PetscOptionsEnd(); 7554 } else { 7555 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7556 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7557 PetscOptionsEnd(); 7558 } 7559 break; 7560 case MATPRODUCT_PtAP: 7561 if (product->api_user) { 7562 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7563 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7564 PetscOptionsEnd(); 7565 } else { 7566 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7567 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7568 PetscOptionsEnd(); 7569 } 7570 break; 7571 default: 7572 break; 7573 } 7574 match = (PetscBool)!usecpu; 7575 } 7576 #endif 7577 if (match) { 7578 switch (product->type) { 7579 case MATPRODUCT_AB: 7580 case MATPRODUCT_AtB: 7581 case MATPRODUCT_PtAP: 7582 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7583 break; 7584 default: 7585 break; 7586 } 7587 } 7588 /* fallback to MPIAIJ ops */ 7589 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7590 PetscFunctionReturn(0); 7591 } 7592 7593 /* 7594 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7595 7596 n - the number of block indices in cc[] 7597 cc - the block indices (must be large enough to contain the indices) 7598 */ 7599 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7600 { 7601 PetscInt cnt = -1, nidx, j; 7602 const PetscInt *idx; 7603 7604 PetscFunctionBegin; 7605 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7606 if (nidx) { 7607 cnt = 0; 7608 cc[cnt] = idx[0] / bs; 7609 for (j = 1; j < nidx; j++) { 7610 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7611 } 7612 } 7613 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7614 *n = cnt + 1; 7615 PetscFunctionReturn(0); 7616 } 7617 7618 /* 7619 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7620 7621 ncollapsed - the number of block indices 7622 collapsed - the block indices (must be large enough to contain the indices) 7623 */ 7624 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7625 { 7626 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7627 7628 PetscFunctionBegin; 7629 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7630 for (i = start + 1; i < start + bs; i++) { 7631 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7632 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7633 cprevtmp = cprev; 7634 cprev = merged; 7635 merged = cprevtmp; 7636 } 7637 *ncollapsed = nprev; 7638 if (collapsed) *collapsed = cprev; 7639 PetscFunctionReturn(0); 7640 } 7641 7642 /* 7643 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7644 */ 7645 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7646 { 7647 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7648 Mat tGmat; 7649 MPI_Comm comm; 7650 const PetscScalar *vals; 7651 const PetscInt *idx; 7652 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7653 MatScalar *AA; // this is checked in graph 7654 PetscBool isseqaij; 7655 Mat a, b, c; 7656 MatType jtype; 7657 7658 PetscFunctionBegin; 7659 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7660 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7661 PetscCall(MatGetType(Gmat, &jtype)); 7662 PetscCall(MatCreate(comm, &tGmat)); 7663 PetscCall(MatSetType(tGmat, jtype)); 7664 7665 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7666 Also, if the matrix is symmetric, can we skip this 7667 operation? It can be very expensive on large matrices. */ 7668 7669 // global sizes 7670 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7671 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7672 nloc = Iend - Istart; 7673 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7674 if (isseqaij) { 7675 a = Gmat; 7676 b = NULL; 7677 } else { 7678 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7679 a = d->A; 7680 b = d->B; 7681 garray = d->garray; 7682 } 7683 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7684 for (PetscInt row = 0; row < nloc; row++) { 7685 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7686 d_nnz[row] = ncols; 7687 if (ncols > maxcols) maxcols = ncols; 7688 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7689 } 7690 if (b) { 7691 for (PetscInt row = 0; row < nloc; row++) { 7692 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7693 o_nnz[row] = ncols; 7694 if (ncols > maxcols) maxcols = ncols; 7695 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7696 } 7697 } 7698 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7699 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7700 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7701 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7702 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7703 PetscCall(PetscFree2(d_nnz, o_nnz)); 7704 // 7705 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7706 nnz0 = nnz1 = 0; 7707 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7708 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7709 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7710 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7711 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7712 if (PetscRealPart(sv) > vfilter) { 7713 nnz1++; 7714 PetscInt cid = idx[jj] + Istart; //diag 7715 if (c != a) cid = garray[idx[jj]]; 7716 AA[ncol_row] = vals[jj]; 7717 AJ[ncol_row] = cid; 7718 ncol_row++; 7719 } 7720 } 7721 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7722 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7723 } 7724 } 7725 PetscCall(PetscFree2(AA, AJ)); 7726 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7727 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7728 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7729 7730 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7731 7732 *filteredG = tGmat; 7733 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7734 PetscFunctionReturn(0); 7735 } 7736 7737 /* 7738 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7739 7740 Input Parameter: 7741 . Amat - matrix 7742 - symmetrize - make the result symmetric 7743 + scale - scale with diagonal 7744 7745 Output Parameter: 7746 . a_Gmat - output scalar graph >= 0 7747 7748 */ 7749 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7750 { 7751 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7752 MPI_Comm comm; 7753 Mat Gmat; 7754 PetscBool ismpiaij, isseqaij; 7755 Mat a, b, c; 7756 MatType jtype; 7757 7758 PetscFunctionBegin; 7759 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7760 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7761 PetscCall(MatGetSize(Amat, &MM, &NN)); 7762 PetscCall(MatGetBlockSize(Amat, &bs)); 7763 nloc = (Iend - Istart) / bs; 7764 7765 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7766 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7767 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7768 7769 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7770 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7771 implementation */ 7772 if (bs > 1) { 7773 PetscCall(MatGetType(Amat, &jtype)); 7774 PetscCall(MatCreate(comm, &Gmat)); 7775 PetscCall(MatSetType(Gmat, jtype)); 7776 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7777 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7778 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7779 PetscInt *d_nnz, *o_nnz; 7780 MatScalar *aa, val, AA[4096]; 7781 PetscInt *aj, *ai, AJ[4096], nc; 7782 if (isseqaij) { 7783 a = Amat; 7784 b = NULL; 7785 } else { 7786 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7787 a = d->A; 7788 b = d->B; 7789 } 7790 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7791 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7792 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7793 PetscInt *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0; 7794 const PetscInt *cols; 7795 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7796 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7797 nnz[brow / bs] = jj / bs; 7798 if (jj % bs) ok = 0; 7799 if (cols) j0 = cols[0]; 7800 else j0 = -1; 7801 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7802 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7803 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7804 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7805 if (jj % bs) ok = 0; 7806 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7807 if (nnz[brow / bs] != jj / bs) ok = 0; 7808 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7809 } 7810 if (!ok) { 7811 PetscCall(PetscFree2(d_nnz, o_nnz)); 7812 goto old_bs; 7813 } 7814 } 7815 PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax); 7816 } 7817 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7818 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7819 PetscCall(PetscFree2(d_nnz, o_nnz)); 7820 // diag 7821 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7822 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7823 ai = aseq->i; 7824 n = ai[brow + 1] - ai[brow]; 7825 aj = aseq->j + ai[brow]; 7826 for (int k = 0; k < n; k += bs) { // block columns 7827 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7828 val = 0; 7829 for (int ii = 0; ii < bs; ii++) { // rows in block 7830 aa = aseq->a + ai[brow + ii] + k; 7831 for (int jj = 0; jj < bs; jj++) { // columns in block 7832 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7833 } 7834 } 7835 AA[k / bs] = val; 7836 } 7837 grow = Istart / bs + brow / bs; 7838 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7839 } 7840 // off-diag 7841 if (ismpiaij) { 7842 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7843 const PetscScalar *vals; 7844 const PetscInt *cols, *garray = aij->garray; 7845 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7846 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7847 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7848 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7849 AA[k / bs] = 0; 7850 AJ[cidx] = garray[cols[k]] / bs; 7851 } 7852 nc = ncols / bs; 7853 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7854 for (int ii = 0; ii < bs; ii++) { // rows in block 7855 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7856 for (int k = 0; k < ncols; k += bs) { 7857 for (int jj = 0; jj < bs; jj++) { // cols in block 7858 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7859 } 7860 } 7861 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7862 } 7863 grow = Istart / bs + brow / bs; 7864 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7865 } 7866 } 7867 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7868 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7869 } else { 7870 const PetscScalar *vals; 7871 const PetscInt *idx; 7872 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7873 old_bs: 7874 /* 7875 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7876 */ 7877 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7878 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7879 if (isseqaij) { 7880 PetscInt max_d_nnz; 7881 /* 7882 Determine exact preallocation count for (sequential) scalar matrix 7883 */ 7884 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7885 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7886 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7887 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7888 PetscCall(PetscFree3(w0, w1, w2)); 7889 } else if (ismpiaij) { 7890 Mat Daij, Oaij; 7891 const PetscInt *garray; 7892 PetscInt max_d_nnz; 7893 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7894 /* 7895 Determine exact preallocation count for diagonal block portion of scalar matrix 7896 */ 7897 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7898 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7899 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7900 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7901 PetscCall(PetscFree3(w0, w1, w2)); 7902 /* 7903 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7904 */ 7905 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7906 o_nnz[jj] = 0; 7907 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7908 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7909 o_nnz[jj] += ncols; 7910 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7911 } 7912 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7913 } 7914 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7915 /* get scalar copy (norms) of matrix */ 7916 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7917 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7918 PetscCall(PetscFree2(d_nnz, o_nnz)); 7919 for (Ii = Istart; Ii < Iend; Ii++) { 7920 PetscInt dest_row = Ii / bs; 7921 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7922 for (jj = 0; jj < ncols; jj++) { 7923 PetscInt dest_col = idx[jj] / bs; 7924 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7925 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7926 } 7927 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7928 } 7929 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7930 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7931 } 7932 } else { 7933 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7934 else { 7935 Gmat = Amat; 7936 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7937 } 7938 if (isseqaij) { 7939 a = Gmat; 7940 b = NULL; 7941 } else { 7942 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7943 a = d->A; 7944 b = d->B; 7945 } 7946 if (filter >= 0 || scale) { 7947 /* take absolute value of each entry */ 7948 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7949 MatInfo info; 7950 PetscScalar *avals; 7951 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7952 PetscCall(MatSeqAIJGetArray(c, &avals)); 7953 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7954 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7955 } 7956 } 7957 } 7958 if (symmetrize) { 7959 PetscBool isset, issym; 7960 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7961 if (!isset || !issym) { 7962 Mat matTrans; 7963 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7964 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7965 PetscCall(MatDestroy(&matTrans)); 7966 } 7967 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7968 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7969 if (scale) { 7970 /* scale c for all diagonal values = 1 or -1 */ 7971 Vec diag; 7972 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7973 PetscCall(MatGetDiagonal(Gmat, diag)); 7974 PetscCall(VecReciprocal(diag)); 7975 PetscCall(VecSqrtAbs(diag)); 7976 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7977 PetscCall(VecDestroy(&diag)); 7978 } 7979 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7980 7981 if (filter >= 0) { 7982 Mat Fmat = NULL; /* some silly compiler needs this */ 7983 7984 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 7985 PetscCall(MatDestroy(&Gmat)); 7986 Gmat = Fmat; 7987 } 7988 *a_Gmat = Gmat; 7989 PetscFunctionReturn(0); 7990 } 7991 7992 /* 7993 Special version for direct calls from Fortran 7994 */ 7995 #include <petsc/private/fortranimpl.h> 7996 7997 /* Change these macros so can be used in void function */ 7998 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7999 #undef PetscCall 8000 #define PetscCall(...) \ 8001 do { \ 8002 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8003 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8004 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8005 return; \ 8006 } \ 8007 } while (0) 8008 8009 #undef SETERRQ 8010 #define SETERRQ(comm, ierr, ...) \ 8011 do { \ 8012 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8013 return; \ 8014 } while (0) 8015 8016 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8017 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8018 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8019 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8020 #else 8021 #endif 8022 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8023 { 8024 Mat mat = *mmat; 8025 PetscInt m = *mm, n = *mn; 8026 InsertMode addv = *maddv; 8027 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8028 PetscScalar value; 8029 8030 MatCheckPreallocated(mat, 1); 8031 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8032 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8033 { 8034 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8035 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8036 PetscBool roworiented = aij->roworiented; 8037 8038 /* Some Variables required in the macro */ 8039 Mat A = aij->A; 8040 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8041 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8042 MatScalar *aa; 8043 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8044 Mat B = aij->B; 8045 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8046 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8047 MatScalar *ba; 8048 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8049 * cannot use "#if defined" inside a macro. */ 8050 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8051 8052 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8053 PetscInt nonew = a->nonew; 8054 MatScalar *ap1, *ap2; 8055 8056 PetscFunctionBegin; 8057 PetscCall(MatSeqAIJGetArray(A, &aa)); 8058 PetscCall(MatSeqAIJGetArray(B, &ba)); 8059 for (i = 0; i < m; i++) { 8060 if (im[i] < 0) continue; 8061 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8062 if (im[i] >= rstart && im[i] < rend) { 8063 row = im[i] - rstart; 8064 lastcol1 = -1; 8065 rp1 = aj + ai[row]; 8066 ap1 = aa + ai[row]; 8067 rmax1 = aimax[row]; 8068 nrow1 = ailen[row]; 8069 low1 = 0; 8070 high1 = nrow1; 8071 lastcol2 = -1; 8072 rp2 = bj + bi[row]; 8073 ap2 = ba + bi[row]; 8074 rmax2 = bimax[row]; 8075 nrow2 = bilen[row]; 8076 low2 = 0; 8077 high2 = nrow2; 8078 8079 for (j = 0; j < n; j++) { 8080 if (roworiented) value = v[i * n + j]; 8081 else value = v[i + j * m]; 8082 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8083 if (in[j] >= cstart && in[j] < cend) { 8084 col = in[j] - cstart; 8085 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8086 } else if (in[j] < 0) continue; 8087 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8088 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 8089 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8090 } else { 8091 if (mat->was_assembled) { 8092 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8093 #if defined(PETSC_USE_CTABLE) 8094 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); 8095 col--; 8096 #else 8097 col = aij->colmap[in[j]] - 1; 8098 #endif 8099 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8100 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8101 col = in[j]; 8102 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8103 B = aij->B; 8104 b = (Mat_SeqAIJ *)B->data; 8105 bimax = b->imax; 8106 bi = b->i; 8107 bilen = b->ilen; 8108 bj = b->j; 8109 rp2 = bj + bi[row]; 8110 ap2 = ba + bi[row]; 8111 rmax2 = bimax[row]; 8112 nrow2 = bilen[row]; 8113 low2 = 0; 8114 high2 = nrow2; 8115 bm = aij->B->rmap->n; 8116 ba = b->a; 8117 inserted = PETSC_FALSE; 8118 } 8119 } else col = in[j]; 8120 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8121 } 8122 } 8123 } else if (!aij->donotstash) { 8124 if (roworiented) { 8125 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8126 } else { 8127 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8128 } 8129 } 8130 } 8131 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8132 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8133 } 8134 PetscFunctionReturnVoid(); 8135 } 8136 8137 /* Undefining these here since they were redefined from their original definition above! No 8138 * other PETSc functions should be defined past this point, as it is impossible to recover the 8139 * original definitions */ 8140 #undef PetscCall 8141 #undef SETERRQ 8142