1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 #if defined(PETSC_USE_LOG) 15 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 16 #endif 17 PetscCall(MatStashDestroy_Private(&mat->stash)); 18 PetscCall(VecDestroy(&aij->diag)); 19 PetscCall(MatDestroy(&aij->A)); 20 PetscCall(MatDestroy(&aij->B)); 21 #if defined(PETSC_USE_CTABLE) 22 PetscCall(PetscHMapIDestroy(&aij->colmap)); 23 #else 24 PetscCall(PetscFree(aij->colmap)); 25 #endif 26 PetscCall(PetscFree(aij->garray)); 27 PetscCall(VecDestroy(&aij->lvec)); 28 PetscCall(VecScatterDestroy(&aij->Mvctx)); 29 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 30 PetscCall(PetscFree(aij->ld)); 31 32 /* Free COO */ 33 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 34 35 PetscCall(PetscFree(mat->data)); 36 37 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 38 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 39 40 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 50 #if defined(PETSC_HAVE_CUDA) 51 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 52 #endif 53 #if defined(PETSC_HAVE_HIP) 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 55 #endif 56 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 57 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 58 #endif 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 60 #if defined(PETSC_HAVE_ELEMENTAL) 61 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 62 #endif 63 #if defined(PETSC_HAVE_SCALAPACK) 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 65 #endif 66 #if defined(PETSC_HAVE_HYPRE) 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 69 #endif 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 76 #if defined(PETSC_HAVE_MKL_SPARSE) 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 78 #endif 79 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 82 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 84 PetscFunctionReturn(PETSC_SUCCESS); 85 } 86 87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 88 #define TYPE AIJ 89 #define TYPE_AIJ 90 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 91 #undef TYPE 92 #undef TYPE_AIJ 93 94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 95 { 96 Mat B; 97 98 PetscFunctionBegin; 99 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 100 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 101 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 102 PetscCall(MatDestroy(&B)); 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 107 { 108 Mat B; 109 110 PetscFunctionBegin; 111 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 112 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 113 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 /*MC 118 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 119 120 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 121 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 122 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 123 for communicators controlling multiple processes. It is recommended that you call both of 124 the above preallocation routines for simplicity. 125 126 Options Database Keys: 127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 128 129 Developer Note: 130 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 131 enough exist. 132 133 Level: beginner 134 135 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 136 M*/ 137 138 /*MC 139 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 140 141 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 142 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 143 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 144 for communicators controlling multiple processes. It is recommended that you call both of 145 the above preallocation routines for simplicity. 146 147 Options Database Keys: 148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 149 150 Level: beginner 151 152 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 153 M*/ 154 155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 156 { 157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 158 159 PetscFunctionBegin; 160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 161 A->boundtocpu = flg; 162 #endif 163 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 164 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 165 166 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 167 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 168 * to differ from the parent matrix. */ 169 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 170 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 171 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 176 { 177 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 178 179 PetscFunctionBegin; 180 if (mat->A) { 181 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 182 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 188 { 189 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 191 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 192 const PetscInt *ia, *ib; 193 const MatScalar *aa, *bb, *aav, *bav; 194 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 195 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 196 197 PetscFunctionBegin; 198 *keptrows = NULL; 199 200 ia = a->i; 201 ib = b->i; 202 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 203 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 204 for (i = 0; i < m; i++) { 205 na = ia[i + 1] - ia[i]; 206 nb = ib[i + 1] - ib[i]; 207 if (!na && !nb) { 208 cnt++; 209 goto ok1; 210 } 211 aa = aav + ia[i]; 212 for (j = 0; j < na; j++) { 213 if (aa[j] != 0.0) goto ok1; 214 } 215 bb = bav + ib[i]; 216 for (j = 0; j < nb; j++) { 217 if (bb[j] != 0.0) goto ok1; 218 } 219 cnt++; 220 ok1:; 221 } 222 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 223 if (!n0rows) { 224 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 225 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 229 cnt = 0; 230 for (i = 0; i < m; i++) { 231 na = ia[i + 1] - ia[i]; 232 nb = ib[i + 1] - ib[i]; 233 if (!na && !nb) continue; 234 aa = aav + ia[i]; 235 for (j = 0; j < na; j++) { 236 if (aa[j] != 0.0) { 237 rows[cnt++] = rstart + i; 238 goto ok2; 239 } 240 } 241 bb = bav + ib[i]; 242 for (j = 0; j < nb; j++) { 243 if (bb[j] != 0.0) { 244 rows[cnt++] = rstart + i; 245 goto ok2; 246 } 247 } 248 ok2:; 249 } 250 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 251 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 252 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 257 { 258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 259 PetscBool cong; 260 261 PetscFunctionBegin; 262 PetscCall(MatHasCongruentLayouts(Y, &cong)); 263 if (Y->assembled && cong) { 264 PetscCall(MatDiagonalSet(aij->A, D, is)); 265 } else { 266 PetscCall(MatDiagonalSet_Default(Y, D, is)); 267 } 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 274 PetscInt i, rstart, nrows, *rows; 275 276 PetscFunctionBegin; 277 *zrows = NULL; 278 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 279 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 280 for (i = 0; i < nrows; i++) rows[i] += rstart; 281 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 288 PetscInt i, m, n, *garray = aij->garray; 289 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 290 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 291 PetscReal *work; 292 const PetscScalar *dummy; 293 294 PetscFunctionBegin; 295 PetscCall(MatGetSize(A, &m, &n)); 296 PetscCall(PetscCalloc1(n, &work)); 297 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 298 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 299 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 300 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 301 if (type == NORM_2) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 304 } else if (type == NORM_1) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 307 } else if (type == NORM_INFINITY) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 310 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 311 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 312 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 313 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 314 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 315 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 316 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 317 if (type == NORM_INFINITY) { 318 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 319 } else { 320 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 321 } 322 PetscCall(PetscFree(work)); 323 if (type == NORM_2) { 324 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 325 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 326 for (i = 0; i < n; i++) reductions[i] /= m; 327 } 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 332 { 333 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 334 IS sis, gis; 335 const PetscInt *isis, *igis; 336 PetscInt n, *iis, nsis, ngis, rstart, i; 337 338 PetscFunctionBegin; 339 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 340 PetscCall(MatFindNonzeroRows(a->B, &gis)); 341 PetscCall(ISGetSize(gis, &ngis)); 342 PetscCall(ISGetSize(sis, &nsis)); 343 PetscCall(ISGetIndices(sis, &isis)); 344 PetscCall(ISGetIndices(gis, &igis)); 345 346 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 347 PetscCall(PetscArraycpy(iis, igis, ngis)); 348 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 349 n = ngis + nsis; 350 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 351 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 352 for (i = 0; i < n; i++) iis[i] += rstart; 353 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 354 355 PetscCall(ISRestoreIndices(sis, &isis)); 356 PetscCall(ISRestoreIndices(gis, &igis)); 357 PetscCall(ISDestroy(&sis)); 358 PetscCall(ISDestroy(&gis)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to access. 368 */ 369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 370 { 371 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 372 PetscInt n = aij->B->cmap->n, i; 373 374 PetscFunctionBegin; 375 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 376 #if defined(PETSC_USE_CTABLE) 377 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 378 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 379 #else 380 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 381 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 382 #endif 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 387 { \ 388 if (col <= lastcol1) low1 = 0; \ 389 else high1 = nrow1; \ 390 lastcol1 = col; \ 391 while (high1 - low1 > 5) { \ 392 t = (low1 + high1) / 2; \ 393 if (rp1[t] > col) high1 = t; \ 394 else low1 = t; \ 395 } \ 396 for (_i = low1; _i < high1; _i++) { \ 397 if (rp1[_i] > col) break; \ 398 if (rp1[_i] == col) { \ 399 if (addv == ADD_VALUES) { \ 400 ap1[_i] += value; \ 401 /* Not sure LogFlops will slow dow the code or not */ \ 402 (void)PetscLogFlops(1.0); \ 403 } else ap1[_i] = value; \ 404 goto a_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries && row != col) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 if (nonew == 1) { \ 413 low1 = 0; \ 414 high1 = nrow1; \ 415 goto a_noinsert; \ 416 } \ 417 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 418 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 419 N = nrow1++ - 1; \ 420 a->nz++; \ 421 high1++; \ 422 /* shift up all the later entries in this row */ \ 423 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 424 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 425 rp1[_i] = col; \ 426 ap1[_i] = value; \ 427 A->nonzerostate++; \ 428 a_noinsert:; \ 429 ailen[row] = nrow1; \ 430 } 431 432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 433 { \ 434 if (col <= lastcol2) low2 = 0; \ 435 else high2 = nrow2; \ 436 lastcol2 = col; \ 437 while (high2 - low2 > 5) { \ 438 t = (low2 + high2) / 2; \ 439 if (rp2[t] > col) high2 = t; \ 440 else low2 = t; \ 441 } \ 442 for (_i = low2; _i < high2; _i++) { \ 443 if (rp2[_i] > col) break; \ 444 if (rp2[_i] == col) { \ 445 if (addv == ADD_VALUES) { \ 446 ap2[_i] += value; \ 447 (void)PetscLogFlops(1.0); \ 448 } else ap2[_i] = value; \ 449 goto b_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 if (nonew == 1) { \ 458 low2 = 0; \ 459 high2 = nrow2; \ 460 goto b_noinsert; \ 461 } \ 462 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 464 N = nrow2++ - 1; \ 465 b->nz++; \ 466 high2++; \ 467 /* shift up all the later entries in this row */ \ 468 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 469 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 470 rp2[_i] = col; \ 471 ap2[_i] = value; \ 472 B->nonzerostate++; \ 473 b_noinsert:; \ 474 bilen[row] = nrow2; \ 475 } 476 477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 478 { 479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 481 PetscInt l, *garray = mat->garray, diag; 482 PetscScalar *aa, *ba; 483 484 PetscFunctionBegin; 485 /* code only works for square matrices A */ 486 487 /* find size of row to the left of the diagonal part */ 488 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 489 row = row - diag; 490 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 491 if (garray[b->j[b->i[row] + l]] > diag) break; 492 } 493 if (l) { 494 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 495 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 496 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 497 } 498 499 /* diagonal part */ 500 if (a->i[row + 1] - a->i[row]) { 501 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 502 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 503 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 504 } 505 506 /* right of diagonal part */ 507 if (b->i[row + 1] - b->i[row] - l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 PetscFunctionReturn(PETSC_SUCCESS); 513 } 514 515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 516 { 517 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 518 PetscScalar value = 0.0; 519 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 520 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 521 PetscBool roworiented = aij->roworiented; 522 523 /* Some Variables required in the macro */ 524 Mat A = aij->A; 525 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 526 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 527 PetscBool ignorezeroentries = a->ignorezeroentries; 528 Mat B = aij->B; 529 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 530 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 531 MatScalar *aa, *ba; 532 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 533 PetscInt nonew; 534 MatScalar *ap1, *ap2; 535 536 PetscFunctionBegin; 537 PetscCall(MatSeqAIJGetArray(A, &aa)); 538 PetscCall(MatSeqAIJGetArray(B, &ba)); 539 for (i = 0; i < m; i++) { 540 if (im[i] < 0) continue; 541 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 542 if (im[i] >= rstart && im[i] < rend) { 543 row = im[i] - rstart; 544 lastcol1 = -1; 545 rp1 = aj + ai[row]; 546 ap1 = aa + ai[row]; 547 rmax1 = aimax[row]; 548 nrow1 = ailen[row]; 549 low1 = 0; 550 high1 = nrow1; 551 lastcol2 = -1; 552 rp2 = bj + bi[row]; 553 ap2 = ba + bi[row]; 554 rmax2 = bimax[row]; 555 nrow2 = bilen[row]; 556 low2 = 0; 557 high2 = nrow2; 558 559 for (j = 0; j < n; j++) { 560 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 561 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 562 if (in[j] >= cstart && in[j] < cend) { 563 col = in[j] - cstart; 564 nonew = a->nonew; 565 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 566 } else if (in[j] < 0) { 567 continue; 568 } else { 569 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 570 if (mat->was_assembled) { 571 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 572 #if defined(PETSC_USE_CTABLE) 573 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 574 col--; 575 #else 576 col = aij->colmap[in[j]] - 1; 577 #endif 578 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 579 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 580 col = in[j]; 581 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 582 B = aij->B; 583 b = (Mat_SeqAIJ *)B->data; 584 bimax = b->imax; 585 bi = b->i; 586 bilen = b->ilen; 587 bj = b->j; 588 ba = b->a; 589 rp2 = bj + bi[row]; 590 ap2 = ba + bi[row]; 591 rmax2 = bimax[row]; 592 nrow2 = bilen[row]; 593 low2 = 0; 594 high2 = nrow2; 595 bm = aij->B->rmap->n; 596 ba = b->a; 597 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 598 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 599 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 600 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 601 } 602 } else col = in[j]; 603 nonew = b->nonew; 604 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 605 } 606 } 607 } else { 608 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 609 if (!aij->donotstash) { 610 mat->assembled = PETSC_FALSE; 611 if (roworiented) { 612 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 613 } else { 614 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 615 } 616 } 617 } 618 } 619 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 620 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 /* 625 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 626 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 627 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 628 */ 629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 632 Mat A = aij->A; /* diagonal part of the matrix */ 633 Mat B = aij->B; /* offdiagonal part of the matrix */ 634 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 635 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 636 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 637 PetscInt *ailen = a->ilen, *aj = a->j; 638 PetscInt *bilen = b->ilen, *bj = b->j; 639 PetscInt am = aij->A->rmap->n, j; 640 PetscInt diag_so_far = 0, dnz; 641 PetscInt offd_so_far = 0, onz; 642 643 PetscFunctionBegin; 644 /* Iterate over all rows of the matrix */ 645 for (j = 0; j < am; j++) { 646 dnz = onz = 0; 647 /* Iterate over all non-zero columns of the current row */ 648 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 649 /* If column is in the diagonal */ 650 if (mat_j[col] >= cstart && mat_j[col] < cend) { 651 aj[diag_so_far++] = mat_j[col] - cstart; 652 dnz++; 653 } else { /* off-diagonal entries */ 654 bj[offd_so_far++] = mat_j[col]; 655 onz++; 656 } 657 } 658 ailen[j] = dnz; 659 bilen[j] = onz; 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 /* 665 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 666 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 667 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 668 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 669 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 670 */ 671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 674 Mat A = aij->A; /* diagonal part of the matrix */ 675 Mat B = aij->B; /* offdiagonal part of the matrix */ 676 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 679 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 680 PetscInt *ailen = a->ilen, *aj = a->j; 681 PetscInt *bilen = b->ilen, *bj = b->j; 682 PetscInt am = aij->A->rmap->n, j; 683 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 684 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 685 PetscScalar *aa = a->a, *ba = b->a; 686 687 PetscFunctionBegin; 688 /* Iterate over all rows of the matrix */ 689 for (j = 0; j < am; j++) { 690 dnz_row = onz_row = 0; 691 rowstart_offd = full_offd_i[j]; 692 rowstart_diag = full_diag_i[j]; 693 /* Iterate over all non-zero columns of the current row */ 694 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 695 /* If column is in the diagonal */ 696 if (mat_j[col] >= cstart && mat_j[col] < cend) { 697 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 698 aa[rowstart_diag + dnz_row] = mat_a[col]; 699 dnz_row++; 700 } else { /* off-diagonal entries */ 701 bj[rowstart_offd + onz_row] = mat_j[col]; 702 ba[rowstart_offd + onz_row] = mat_a[col]; 703 onz_row++; 704 } 705 } 706 ailen[j] = dnz_row; 707 bilen[j] = onz_row; 708 } 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 715 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 716 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 717 718 PetscFunctionBegin; 719 for (i = 0; i < m; i++) { 720 if (idxm[i] < 0) continue; /* negative row */ 721 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 722 if (idxm[i] >= rstart && idxm[i] < rend) { 723 row = idxm[i] - rstart; 724 for (j = 0; j < n; j++) { 725 if (idxn[j] < 0) continue; /* negative column */ 726 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 727 if (idxn[j] >= cstart && idxn[j] < cend) { 728 col = idxn[j] - cstart; 729 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 730 } else { 731 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 732 #if defined(PETSC_USE_CTABLE) 733 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 734 col--; 735 #else 736 col = aij->colmap[idxn[j]] - 1; 737 #endif 738 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 739 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 740 } 741 } 742 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 743 } 744 PetscFunctionReturn(PETSC_SUCCESS); 745 } 746 747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 750 PetscInt nstash, reallocs; 751 752 PetscFunctionBegin; 753 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 754 755 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 756 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 757 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 762 { 763 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 764 PetscMPIInt n; 765 PetscInt i, j, rstart, ncols, flg; 766 PetscInt *row, *col; 767 PetscBool other_disassembled; 768 PetscScalar *val; 769 770 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 771 772 PetscFunctionBegin; 773 if (!aij->donotstash && !mat->nooffprocentries) { 774 while (1) { 775 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 776 if (!flg) break; 777 778 for (i = 0; i < n;) { 779 /* Now identify the consecutive vals belonging to the same row */ 780 for (j = i, rstart = row[j]; j < n; j++) { 781 if (row[j] != rstart) break; 782 } 783 if (j < n) ncols = j - i; 784 else ncols = n - i; 785 /* Now assemble all these values with a single function call */ 786 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 787 i = j; 788 } 789 } 790 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 791 } 792 #if defined(PETSC_HAVE_DEVICE) 793 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 794 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 795 if (mat->boundtocpu) { 796 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 797 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 798 } 799 #endif 800 PetscCall(MatAssemblyBegin(aij->A, mode)); 801 PetscCall(MatAssemblyEnd(aij->A, mode)); 802 803 /* determine if any processor has disassembled, if so we must 804 also disassemble ourself, in order that we may reassemble. */ 805 /* 806 if nonzero structure of submatrix B cannot change then we know that 807 no processor disassembled thus we can skip this stuff 808 */ 809 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 810 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 811 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 812 PetscCall(MatDisAssemble_MPIAIJ(mat)); 813 } 814 } 815 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 816 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 817 #if defined(PETSC_HAVE_DEVICE) 818 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 819 #endif 820 PetscCall(MatAssemblyBegin(aij->B, mode)); 821 PetscCall(MatAssemblyEnd(aij->B, mode)); 822 823 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 824 825 aij->rowvalues = NULL; 826 827 PetscCall(VecDestroy(&aij->diag)); 828 829 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 830 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 831 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 832 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 833 } 834 #if defined(PETSC_HAVE_DEVICE) 835 mat->offloadmask = PETSC_OFFLOAD_BOTH; 836 #endif 837 PetscFunctionReturn(PETSC_SUCCESS); 838 } 839 840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 841 { 842 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 843 844 PetscFunctionBegin; 845 PetscCall(MatZeroEntries(l->A)); 846 PetscCall(MatZeroEntries(l->B)); 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 851 { 852 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 853 PetscObjectState sA, sB; 854 PetscInt *lrows; 855 PetscInt r, len; 856 PetscBool cong, lch, gch; 857 858 PetscFunctionBegin; 859 /* get locally owned rows */ 860 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 861 PetscCall(MatHasCongruentLayouts(A, &cong)); 862 /* fix right hand side if needed */ 863 if (x && b) { 864 const PetscScalar *xx; 865 PetscScalar *bb; 866 867 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 868 PetscCall(VecGetArrayRead(x, &xx)); 869 PetscCall(VecGetArray(b, &bb)); 870 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 871 PetscCall(VecRestoreArrayRead(x, &xx)); 872 PetscCall(VecRestoreArray(b, &bb)); 873 } 874 875 sA = mat->A->nonzerostate; 876 sB = mat->B->nonzerostate; 877 878 if (diag != 0.0 && cong) { 879 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 880 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 881 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 882 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 883 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 884 PetscInt nnwA, nnwB; 885 PetscBool nnzA, nnzB; 886 887 nnwA = aijA->nonew; 888 nnwB = aijB->nonew; 889 nnzA = aijA->keepnonzeropattern; 890 nnzB = aijB->keepnonzeropattern; 891 if (!nnzA) { 892 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 893 aijA->nonew = 0; 894 } 895 if (!nnzB) { 896 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 897 aijB->nonew = 0; 898 } 899 /* Must zero here before the next loop */ 900 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 901 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 902 for (r = 0; r < len; ++r) { 903 const PetscInt row = lrows[r] + A->rmap->rstart; 904 if (row >= A->cmap->N) continue; 905 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 906 } 907 aijA->nonew = nnwA; 908 aijB->nonew = nnwB; 909 } else { 910 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 911 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 912 } 913 PetscCall(PetscFree(lrows)); 914 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 916 917 /* reduce nonzerostate */ 918 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 919 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 920 if (gch) A->nonzerostate++; 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 925 { 926 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 927 PetscMPIInt n = A->rmap->n; 928 PetscInt i, j, r, m, len = 0; 929 PetscInt *lrows, *owners = A->rmap->range; 930 PetscMPIInt p = 0; 931 PetscSFNode *rrows; 932 PetscSF sf; 933 const PetscScalar *xx; 934 PetscScalar *bb, *mask, *aij_a; 935 Vec xmask, lmask; 936 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 937 const PetscInt *aj, *ii, *ridx; 938 PetscScalar *aa; 939 940 PetscFunctionBegin; 941 /* Create SF where leaves are input rows and roots are owned rows */ 942 PetscCall(PetscMalloc1(n, &lrows)); 943 for (r = 0; r < n; ++r) lrows[r] = -1; 944 PetscCall(PetscMalloc1(N, &rrows)); 945 for (r = 0; r < N; ++r) { 946 const PetscInt idx = rows[r]; 947 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 948 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 949 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 950 } 951 rrows[r].rank = p; 952 rrows[r].index = rows[r] - owners[p]; 953 } 954 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 955 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 956 /* Collect flags for rows to be zeroed */ 957 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 958 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 959 PetscCall(PetscSFDestroy(&sf)); 960 /* Compress and put in row numbers */ 961 for (r = 0; r < n; ++r) 962 if (lrows[r] >= 0) lrows[len++] = r; 963 /* zero diagonal part of matrix */ 964 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 965 /* handle off diagonal part of matrix */ 966 PetscCall(MatCreateVecs(A, &xmask, NULL)); 967 PetscCall(VecDuplicate(l->lvec, &lmask)); 968 PetscCall(VecGetArray(xmask, &bb)); 969 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 970 PetscCall(VecRestoreArray(xmask, &bb)); 971 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 972 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 973 PetscCall(VecDestroy(&xmask)); 974 if (x && b) { /* this code is buggy when the row and column layout don't match */ 975 PetscBool cong; 976 977 PetscCall(MatHasCongruentLayouts(A, &cong)); 978 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 979 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 980 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecGetArrayRead(l->lvec, &xx)); 982 PetscCall(VecGetArray(b, &bb)); 983 } 984 PetscCall(VecGetArray(lmask, &mask)); 985 /* remove zeroed rows of off diagonal matrix */ 986 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 987 ii = aij->i; 988 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 989 /* loop over all elements of off process part of matrix zeroing removed columns*/ 990 if (aij->compressedrow.use) { 991 m = aij->compressedrow.nrows; 992 ii = aij->compressedrow.i; 993 ridx = aij->compressedrow.rindex; 994 for (i = 0; i < m; i++) { 995 n = ii[i + 1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij_a + ii[i]; 998 999 for (j = 0; j < n; j++) { 1000 if (PetscAbsScalar(mask[*aj])) { 1001 if (b) bb[*ridx] -= *aa * xx[*aj]; 1002 *aa = 0.0; 1003 } 1004 aa++; 1005 aj++; 1006 } 1007 ridx++; 1008 } 1009 } else { /* do not use compressed row format */ 1010 m = l->B->rmap->n; 1011 for (i = 0; i < m; i++) { 1012 n = ii[i + 1] - ii[i]; 1013 aj = aij->j + ii[i]; 1014 aa = aij_a + ii[i]; 1015 for (j = 0; j < n; j++) { 1016 if (PetscAbsScalar(mask[*aj])) { 1017 if (b) bb[i] -= *aa * xx[*aj]; 1018 *aa = 0.0; 1019 } 1020 aa++; 1021 aj++; 1022 } 1023 } 1024 } 1025 if (x && b) { 1026 PetscCall(VecRestoreArray(b, &bb)); 1027 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1028 } 1029 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1030 PetscCall(VecRestoreArray(lmask, &mask)); 1031 PetscCall(VecDestroy(&lmask)); 1032 PetscCall(PetscFree(lrows)); 1033 1034 /* only change matrix nonzero state if pattern was allowed to be changed */ 1035 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1036 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1037 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1038 } 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1045 PetscInt nt; 1046 VecScatter Mvctx = a->Mvctx; 1047 1048 PetscFunctionBegin; 1049 PetscCall(VecGetLocalSize(xx, &nt)); 1050 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1051 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1052 PetscUseTypeMethod(a->A, mult, xx, yy); 1053 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1054 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 VecScatter Mvctx = a->Mvctx; 1071 1072 PetscFunctionBegin; 1073 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1074 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1075 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1076 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1077 PetscFunctionReturn(PETSC_SUCCESS); 1078 } 1079 1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1083 1084 PetscFunctionBegin; 1085 /* do nondiagonal part */ 1086 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1087 /* do local part */ 1088 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1089 /* add partial results together */ 1090 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1091 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1092 PetscFunctionReturn(PETSC_SUCCESS); 1093 } 1094 1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1096 { 1097 MPI_Comm comm; 1098 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 1099 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 1100 IS Me, Notme; 1101 PetscInt M, N, first, last, *notme, i; 1102 PetscBool lf; 1103 PetscMPIInt size; 1104 1105 PetscFunctionBegin; 1106 /* Easy test: symmetric diagonal block */ 1107 Bij = (Mat_MPIAIJ *)Bmat->data; 1108 Bdia = Bij->A; 1109 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1110 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1111 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1112 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1113 PetscCallMPI(MPI_Comm_size(comm, &size)); 1114 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1115 1116 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1117 PetscCall(MatGetSize(Amat, &M, &N)); 1118 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1119 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1120 for (i = 0; i < first; i++) notme[i] = i; 1121 for (i = last; i < M; i++) notme[i - last + first] = i; 1122 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1123 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1124 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1125 Aoff = Aoffs[0]; 1126 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1127 Boff = Boffs[0]; 1128 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1129 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1130 PetscCall(MatDestroyMatrices(1, &Boffs)); 1131 PetscCall(ISDestroy(&Me)); 1132 PetscCall(ISDestroy(&Notme)); 1133 PetscCall(PetscFree(notme)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1138 { 1139 PetscFunctionBegin; 1140 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 /* do nondiagonal part */ 1150 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1151 /* do local part */ 1152 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1153 /* add partial results together */ 1154 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1155 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 /* 1160 This only works correctly for square matrices where the subblock A->A is the 1161 diagonal block 1162 */ 1163 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1164 { 1165 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1166 1167 PetscFunctionBegin; 1168 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1169 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1170 PetscCall(MatGetDiagonal(a->A, v)); 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1175 { 1176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatScale(a->A, aa)); 1180 PetscCall(MatScale(a->B, aa)); 1181 PetscFunctionReturn(PETSC_SUCCESS); 1182 } 1183 1184 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1185 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 1189 PetscFunctionBegin; 1190 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1191 PetscCall(PetscFree(aij->Aperm1)); 1192 PetscCall(PetscFree(aij->Bperm1)); 1193 PetscCall(PetscFree(aij->Ajmap1)); 1194 PetscCall(PetscFree(aij->Bjmap1)); 1195 1196 PetscCall(PetscFree(aij->Aimap2)); 1197 PetscCall(PetscFree(aij->Bimap2)); 1198 PetscCall(PetscFree(aij->Aperm2)); 1199 PetscCall(PetscFree(aij->Bperm2)); 1200 PetscCall(PetscFree(aij->Ajmap2)); 1201 PetscCall(PetscFree(aij->Bjmap2)); 1202 1203 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1204 PetscCall(PetscFree(aij->Cperm1)); 1205 PetscFunctionReturn(PETSC_SUCCESS); 1206 } 1207 1208 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1209 { 1210 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1211 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1212 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1213 const PetscInt *garray = aij->garray; 1214 const PetscScalar *aa, *ba; 1215 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1216 PetscInt *rowlens; 1217 PetscInt *colidxs; 1218 PetscScalar *matvals; 1219 1220 PetscFunctionBegin; 1221 PetscCall(PetscViewerSetUp(viewer)); 1222 1223 M = mat->rmap->N; 1224 N = mat->cmap->N; 1225 m = mat->rmap->n; 1226 rs = mat->rmap->rstart; 1227 cs = mat->cmap->rstart; 1228 nz = A->nz + B->nz; 1229 1230 /* write matrix header */ 1231 header[0] = MAT_FILE_CLASSID; 1232 header[1] = M; 1233 header[2] = N; 1234 header[3] = nz; 1235 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1236 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1237 1238 /* fill in and store row lengths */ 1239 PetscCall(PetscMalloc1(m, &rowlens)); 1240 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1241 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1242 PetscCall(PetscFree(rowlens)); 1243 1244 /* fill in and store column indices */ 1245 PetscCall(PetscMalloc1(nz, &colidxs)); 1246 for (cnt = 0, i = 0; i < m; i++) { 1247 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1248 if (garray[B->j[jb]] > cs) break; 1249 colidxs[cnt++] = garray[B->j[jb]]; 1250 } 1251 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1252 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1253 } 1254 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1255 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1256 PetscCall(PetscFree(colidxs)); 1257 1258 /* fill in and store nonzero values */ 1259 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1260 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1261 PetscCall(PetscMalloc1(nz, &matvals)); 1262 for (cnt = 0, i = 0; i < m; i++) { 1263 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1264 if (garray[B->j[jb]] > cs) break; 1265 matvals[cnt++] = ba[jb]; 1266 } 1267 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1268 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1269 } 1270 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1272 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1273 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1274 PetscCall(PetscFree(matvals)); 1275 1276 /* write block size option to the viewer's .info file */ 1277 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1278 PetscFunctionReturn(PETSC_SUCCESS); 1279 } 1280 1281 #include <petscdraw.h> 1282 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1283 { 1284 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1285 PetscMPIInt rank = aij->rank, size = aij->size; 1286 PetscBool isdraw, iascii, isbinary; 1287 PetscViewer sviewer; 1288 PetscViewerFormat format; 1289 1290 PetscFunctionBegin; 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1294 if (iascii) { 1295 PetscCall(PetscViewerGetFormat(viewer, &format)); 1296 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1297 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1298 PetscCall(PetscMalloc1(size, &nz)); 1299 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1300 for (i = 0; i < (PetscInt)size; i++) { 1301 nmax = PetscMax(nmax, nz[i]); 1302 nmin = PetscMin(nmin, nz[i]); 1303 navg += nz[i]; 1304 } 1305 PetscCall(PetscFree(nz)); 1306 navg = navg / size; 1307 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 PetscCall(PetscViewerGetFormat(viewer, &format)); 1311 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1312 MatInfo info; 1313 PetscInt *inodes = NULL; 1314 1315 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1316 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1317 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1318 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1319 if (!inodes) { 1320 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1321 (double)info.memory)); 1322 } else { 1323 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1324 (double)info.memory)); 1325 } 1326 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1328 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1329 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1330 PetscCall(PetscViewerFlush(viewer)); 1331 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1332 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1333 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1336 PetscInt inodecount, inodelimit, *inodes; 1337 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1338 if (inodes) { 1339 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1340 } else { 1341 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1342 } 1343 PetscFunctionReturn(PETSC_SUCCESS); 1344 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1345 PetscFunctionReturn(PETSC_SUCCESS); 1346 } 1347 } else if (isbinary) { 1348 if (size == 1) { 1349 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1350 PetscCall(MatView(aij->A, viewer)); 1351 } else { 1352 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1353 } 1354 PetscFunctionReturn(PETSC_SUCCESS); 1355 } else if (iascii && size == 1) { 1356 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1357 PetscCall(MatView(aij->A, viewer)); 1358 PetscFunctionReturn(PETSC_SUCCESS); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1363 PetscCall(PetscDrawIsNull(draw, &isnull)); 1364 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1365 } 1366 1367 { /* assemble the entire matrix onto first processor */ 1368 Mat A = NULL, Av; 1369 IS isrow, iscol; 1370 1371 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1373 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1374 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1375 /* The commented code uses MatCreateSubMatrices instead */ 1376 /* 1377 Mat *AA, A = NULL, Av; 1378 IS isrow,iscol; 1379 1380 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1382 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1383 if (rank == 0) { 1384 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1385 A = AA[0]; 1386 Av = AA[0]; 1387 } 1388 PetscCall(MatDestroySubMatrices(1,&AA)); 1389 */ 1390 PetscCall(ISDestroy(&iscol)); 1391 PetscCall(ISDestroy(&isrow)); 1392 /* 1393 Everyone has to call to draw the matrix since the graphics waits are 1394 synchronized across all processors that share the PetscDraw object 1395 */ 1396 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1397 if (rank == 0) { 1398 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1399 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1400 } 1401 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1402 PetscCall(PetscViewerFlush(viewer)); 1403 PetscCall(MatDestroy(&A)); 1404 } 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1409 { 1410 PetscBool iascii, isdraw, issocket, isbinary; 1411 1412 PetscFunctionBegin; 1413 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1417 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1418 PetscFunctionReturn(PETSC_SUCCESS); 1419 } 1420 1421 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1422 { 1423 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1424 Vec bb1 = NULL; 1425 PetscBool hasop; 1426 1427 PetscFunctionBegin; 1428 if (flag == SOR_APPLY_UPPER) { 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1430 PetscFunctionReturn(PETSC_SUCCESS); 1431 } 1432 1433 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1434 1435 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1436 if (flag & SOR_ZERO_INITIAL_GUESS) { 1437 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1438 its--; 1439 } 1440 1441 while (its--) { 1442 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1443 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1444 1445 /* update rhs: bb1 = bb - B*x */ 1446 PetscCall(VecScale(mat->lvec, -1.0)); 1447 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1448 1449 /* local sweep */ 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1451 } 1452 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1453 if (flag & SOR_ZERO_INITIAL_GUESS) { 1454 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1455 its--; 1456 } 1457 while (its--) { 1458 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1459 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1460 1461 /* update rhs: bb1 = bb - B*x */ 1462 PetscCall(VecScale(mat->lvec, -1.0)); 1463 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1464 1465 /* local sweep */ 1466 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1467 } 1468 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1469 if (flag & SOR_ZERO_INITIAL_GUESS) { 1470 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1471 its--; 1472 } 1473 while (its--) { 1474 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1475 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1476 1477 /* update rhs: bb1 = bb - B*x */ 1478 PetscCall(VecScale(mat->lvec, -1.0)); 1479 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1480 1481 /* local sweep */ 1482 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1483 } 1484 } else if (flag & SOR_EISENSTAT) { 1485 Vec xx1; 1486 1487 PetscCall(VecDuplicate(bb, &xx1)); 1488 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1489 1490 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1491 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1492 if (!mat->diag) { 1493 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1494 PetscCall(MatGetDiagonal(matin, mat->diag)); 1495 } 1496 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1497 if (hasop) { 1498 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1499 } else { 1500 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1501 } 1502 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1503 1504 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1505 1506 /* local sweep */ 1507 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1508 PetscCall(VecAXPY(xx, 1.0, xx1)); 1509 PetscCall(VecDestroy(&xx1)); 1510 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1511 1512 PetscCall(VecDestroy(&bb1)); 1513 1514 matin->factorerrortype = mat->A->factorerrortype; 1515 PetscFunctionReturn(PETSC_SUCCESS); 1516 } 1517 1518 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1519 { 1520 Mat aA, aB, Aperm; 1521 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1522 PetscScalar *aa, *ba; 1523 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1524 PetscSF rowsf, sf; 1525 IS parcolp = NULL; 1526 PetscBool done; 1527 1528 PetscFunctionBegin; 1529 PetscCall(MatGetLocalSize(A, &m, &n)); 1530 PetscCall(ISGetIndices(rowp, &rwant)); 1531 PetscCall(ISGetIndices(colp, &cwant)); 1532 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1533 1534 /* Invert row permutation to find out where my rows should go */ 1535 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1536 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1537 PetscCall(PetscSFSetFromOptions(rowsf)); 1538 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1539 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1540 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1541 1542 /* Invert column permutation to find out where my columns should go */ 1543 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1544 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1545 PetscCall(PetscSFSetFromOptions(sf)); 1546 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1547 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1548 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1549 PetscCall(PetscSFDestroy(&sf)); 1550 1551 PetscCall(ISRestoreIndices(rowp, &rwant)); 1552 PetscCall(ISRestoreIndices(colp, &cwant)); 1553 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1554 1555 /* Find out where my gcols should go */ 1556 PetscCall(MatGetSize(aB, NULL, &ng)); 1557 PetscCall(PetscMalloc1(ng, &gcdest)); 1558 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1559 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1560 PetscCall(PetscSFSetFromOptions(sf)); 1561 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1562 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1563 PetscCall(PetscSFDestroy(&sf)); 1564 1565 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1566 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1567 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1568 for (i = 0; i < m; i++) { 1569 PetscInt row = rdest[i]; 1570 PetscMPIInt rowner; 1571 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1572 for (j = ai[i]; j < ai[i + 1]; j++) { 1573 PetscInt col = cdest[aj[j]]; 1574 PetscMPIInt cowner; 1575 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1576 if (rowner == cowner) dnnz[i]++; 1577 else onnz[i]++; 1578 } 1579 for (j = bi[i]; j < bi[i + 1]; j++) { 1580 PetscInt col = gcdest[bj[j]]; 1581 PetscMPIInt cowner; 1582 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1583 if (rowner == cowner) dnnz[i]++; 1584 else onnz[i]++; 1585 } 1586 } 1587 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1588 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1589 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1590 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1591 PetscCall(PetscSFDestroy(&rowsf)); 1592 1593 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1594 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1595 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1596 for (i = 0; i < m; i++) { 1597 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1598 PetscInt j0, rowlen; 1599 rowlen = ai[i + 1] - ai[i]; 1600 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1601 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1602 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1603 } 1604 rowlen = bi[i + 1] - bi[i]; 1605 for (j0 = j = 0; j < rowlen; j0 = j) { 1606 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1607 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1608 } 1609 } 1610 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1611 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1612 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1613 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1614 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1615 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1616 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1617 PetscCall(PetscFree3(work, rdest, cdest)); 1618 PetscCall(PetscFree(gcdest)); 1619 if (parcolp) PetscCall(ISDestroy(&colp)); 1620 *B = Aperm; 1621 PetscFunctionReturn(PETSC_SUCCESS); 1622 } 1623 1624 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1625 { 1626 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1627 1628 PetscFunctionBegin; 1629 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1630 if (ghosts) *ghosts = aij->garray; 1631 PetscFunctionReturn(PETSC_SUCCESS); 1632 } 1633 1634 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1635 { 1636 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1637 Mat A = mat->A, B = mat->B; 1638 PetscLogDouble isend[5], irecv[5]; 1639 1640 PetscFunctionBegin; 1641 info->block_size = 1.0; 1642 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1643 1644 isend[0] = info->nz_used; 1645 isend[1] = info->nz_allocated; 1646 isend[2] = info->nz_unneeded; 1647 isend[3] = info->memory; 1648 isend[4] = info->mallocs; 1649 1650 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1651 1652 isend[0] += info->nz_used; 1653 isend[1] += info->nz_allocated; 1654 isend[2] += info->nz_unneeded; 1655 isend[3] += info->memory; 1656 isend[4] += info->mallocs; 1657 if (flag == MAT_LOCAL) { 1658 info->nz_used = isend[0]; 1659 info->nz_allocated = isend[1]; 1660 info->nz_unneeded = isend[2]; 1661 info->memory = isend[3]; 1662 info->mallocs = isend[4]; 1663 } else if (flag == MAT_GLOBAL_MAX) { 1664 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } else if (flag == MAT_GLOBAL_SUM) { 1672 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1673 1674 info->nz_used = irecv[0]; 1675 info->nz_allocated = irecv[1]; 1676 info->nz_unneeded = irecv[2]; 1677 info->memory = irecv[3]; 1678 info->mallocs = irecv[4]; 1679 } 1680 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1681 info->fill_ratio_needed = 0; 1682 info->factor_mallocs = 0; 1683 PetscFunctionReturn(PETSC_SUCCESS); 1684 } 1685 1686 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1687 { 1688 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1689 1690 PetscFunctionBegin; 1691 switch (op) { 1692 case MAT_NEW_NONZERO_LOCATIONS: 1693 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1694 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1695 case MAT_KEEP_NONZERO_PATTERN: 1696 case MAT_NEW_NONZERO_LOCATION_ERR: 1697 case MAT_USE_INODES: 1698 case MAT_IGNORE_ZERO_ENTRIES: 1699 case MAT_FORM_EXPLICIT_TRANSPOSE: 1700 MatCheckPreallocated(A, 1); 1701 PetscCall(MatSetOption(a->A, op, flg)); 1702 PetscCall(MatSetOption(a->B, op, flg)); 1703 break; 1704 case MAT_ROW_ORIENTED: 1705 MatCheckPreallocated(A, 1); 1706 a->roworiented = flg; 1707 1708 PetscCall(MatSetOption(a->A, op, flg)); 1709 PetscCall(MatSetOption(a->B, op, flg)); 1710 break; 1711 case MAT_FORCE_DIAGONAL_ENTRIES: 1712 case MAT_SORTED_FULL: 1713 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1714 break; 1715 case MAT_IGNORE_OFF_PROC_ENTRIES: 1716 a->donotstash = flg; 1717 break; 1718 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1719 case MAT_SPD: 1720 case MAT_SYMMETRIC: 1721 case MAT_STRUCTURALLY_SYMMETRIC: 1722 case MAT_HERMITIAN: 1723 case MAT_SYMMETRY_ETERNAL: 1724 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1725 case MAT_SPD_ETERNAL: 1726 /* if the diagonal matrix is square it inherits some of the properties above */ 1727 break; 1728 case MAT_SUBMAT_SINGLEIS: 1729 A->submat_singleis = flg; 1730 break; 1731 case MAT_STRUCTURE_ONLY: 1732 /* The option is handled directly by MatSetOption() */ 1733 break; 1734 default: 1735 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1736 } 1737 PetscFunctionReturn(PETSC_SUCCESS); 1738 } 1739 1740 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1741 { 1742 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1743 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1744 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1745 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1746 PetscInt *cmap, *idx_p; 1747 1748 PetscFunctionBegin; 1749 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1750 mat->getrowactive = PETSC_TRUE; 1751 1752 if (!mat->rowvalues && (idx || v)) { 1753 /* 1754 allocate enough space to hold information from the longest row. 1755 */ 1756 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1757 PetscInt max = 1, tmp; 1758 for (i = 0; i < matin->rmap->n; i++) { 1759 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1760 if (max < tmp) max = tmp; 1761 } 1762 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1763 } 1764 1765 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1766 lrow = row - rstart; 1767 1768 pvA = &vworkA; 1769 pcA = &cworkA; 1770 pvB = &vworkB; 1771 pcB = &cworkB; 1772 if (!v) { 1773 pvA = NULL; 1774 pvB = NULL; 1775 } 1776 if (!idx) { 1777 pcA = NULL; 1778 if (!v) pcB = NULL; 1779 } 1780 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1781 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1782 nztot = nzA + nzB; 1783 1784 cmap = mat->garray; 1785 if (v || idx) { 1786 if (nztot) { 1787 /* Sort by increasing column numbers, assuming A and B already sorted */ 1788 PetscInt imark = -1; 1789 if (v) { 1790 *v = v_p = mat->rowvalues; 1791 for (i = 0; i < nzB; i++) { 1792 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1793 else break; 1794 } 1795 imark = i; 1796 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1797 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1798 } 1799 if (idx) { 1800 *idx = idx_p = mat->rowindices; 1801 if (imark > -1) { 1802 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1803 } else { 1804 for (i = 0; i < nzB; i++) { 1805 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1806 else break; 1807 } 1808 imark = i; 1809 } 1810 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1811 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1812 } 1813 } else { 1814 if (idx) *idx = NULL; 1815 if (v) *v = NULL; 1816 } 1817 } 1818 *nz = nztot; 1819 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1820 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1821 PetscFunctionReturn(PETSC_SUCCESS); 1822 } 1823 1824 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1825 { 1826 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1827 1828 PetscFunctionBegin; 1829 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1830 aij->getrowactive = PETSC_FALSE; 1831 PetscFunctionReturn(PETSC_SUCCESS); 1832 } 1833 1834 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1835 { 1836 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1837 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1838 PetscInt i, j, cstart = mat->cmap->rstart; 1839 PetscReal sum = 0.0; 1840 const MatScalar *v, *amata, *bmata; 1841 1842 PetscFunctionBegin; 1843 if (aij->size == 1) { 1844 PetscCall(MatNorm(aij->A, type, norm)); 1845 } else { 1846 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1847 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1848 if (type == NORM_FROBENIUS) { 1849 v = amata; 1850 for (i = 0; i < amat->nz; i++) { 1851 sum += PetscRealPart(PetscConj(*v) * (*v)); 1852 v++; 1853 } 1854 v = bmata; 1855 for (i = 0; i < bmat->nz; i++) { 1856 sum += PetscRealPart(PetscConj(*v) * (*v)); 1857 v++; 1858 } 1859 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1860 *norm = PetscSqrtReal(*norm); 1861 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1862 } else if (type == NORM_1) { /* max column norm */ 1863 PetscReal *tmp, *tmp2; 1864 PetscInt *jj, *garray = aij->garray; 1865 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1866 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1867 *norm = 0.0; 1868 v = amata; 1869 jj = amat->j; 1870 for (j = 0; j < amat->nz; j++) { 1871 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1872 v++; 1873 } 1874 v = bmata; 1875 jj = bmat->j; 1876 for (j = 0; j < bmat->nz; j++) { 1877 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1878 v++; 1879 } 1880 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1881 for (j = 0; j < mat->cmap->N; j++) { 1882 if (tmp2[j] > *norm) *norm = tmp2[j]; 1883 } 1884 PetscCall(PetscFree(tmp)); 1885 PetscCall(PetscFree(tmp2)); 1886 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1887 } else if (type == NORM_INFINITY) { /* max row norm */ 1888 PetscReal ntemp = 0.0; 1889 for (j = 0; j < aij->A->rmap->n; j++) { 1890 v = amata + amat->i[j]; 1891 sum = 0.0; 1892 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1893 sum += PetscAbsScalar(*v); 1894 v++; 1895 } 1896 v = bmata + bmat->i[j]; 1897 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1898 sum += PetscAbsScalar(*v); 1899 v++; 1900 } 1901 if (sum > ntemp) ntemp = sum; 1902 } 1903 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1904 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1905 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1906 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1907 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1908 } 1909 PetscFunctionReturn(PETSC_SUCCESS); 1910 } 1911 1912 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1913 { 1914 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1915 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1916 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1917 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1918 Mat B, A_diag, *B_diag; 1919 const MatScalar *pbv, *bv; 1920 1921 PetscFunctionBegin; 1922 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1923 ma = A->rmap->n; 1924 na = A->cmap->n; 1925 mb = a->B->rmap->n; 1926 nb = a->B->cmap->n; 1927 ai = Aloc->i; 1928 aj = Aloc->j; 1929 bi = Bloc->i; 1930 bj = Bloc->j; 1931 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1932 PetscInt *d_nnz, *g_nnz, *o_nnz; 1933 PetscSFNode *oloc; 1934 PETSC_UNUSED PetscSF sf; 1935 1936 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1937 /* compute d_nnz for preallocation */ 1938 PetscCall(PetscArrayzero(d_nnz, na)); 1939 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1940 /* compute local off-diagonal contributions */ 1941 PetscCall(PetscArrayzero(g_nnz, nb)); 1942 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1943 /* map those to global */ 1944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1945 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1946 PetscCall(PetscSFSetFromOptions(sf)); 1947 PetscCall(PetscArrayzero(o_nnz, na)); 1948 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1949 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1950 PetscCall(PetscSFDestroy(&sf)); 1951 1952 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1953 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1954 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1955 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1956 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1957 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1958 } else { 1959 B = *matout; 1960 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1961 } 1962 1963 b = (Mat_MPIAIJ *)B->data; 1964 A_diag = a->A; 1965 B_diag = &b->A; 1966 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1967 A_diag_ncol = A_diag->cmap->N; 1968 B_diag_ilen = sub_B_diag->ilen; 1969 B_diag_i = sub_B_diag->i; 1970 1971 /* Set ilen for diagonal of B */ 1972 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1973 1974 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1975 very quickly (=without using MatSetValues), because all writes are local. */ 1976 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1977 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1978 1979 /* copy over the B part */ 1980 PetscCall(PetscMalloc1(bi[mb], &cols)); 1981 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1982 pbv = bv; 1983 row = A->rmap->rstart; 1984 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1985 cols_tmp = cols; 1986 for (i = 0; i < mb; i++) { 1987 ncol = bi[i + 1] - bi[i]; 1988 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1989 row++; 1990 pbv += ncol; 1991 cols_tmp += ncol; 1992 } 1993 PetscCall(PetscFree(cols)); 1994 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1995 1996 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1997 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1998 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1999 *matout = B; 2000 } else { 2001 PetscCall(MatHeaderMerge(A, &B)); 2002 } 2003 PetscFunctionReturn(PETSC_SUCCESS); 2004 } 2005 2006 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2007 { 2008 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2009 Mat a = aij->A, b = aij->B; 2010 PetscInt s1, s2, s3; 2011 2012 PetscFunctionBegin; 2013 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2014 if (rr) { 2015 PetscCall(VecGetLocalSize(rr, &s1)); 2016 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2017 /* Overlap communication with computation. */ 2018 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2019 } 2020 if (ll) { 2021 PetscCall(VecGetLocalSize(ll, &s1)); 2022 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2023 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2024 } 2025 /* scale the diagonal block */ 2026 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2027 2028 if (rr) { 2029 /* Do a scatter end and then right scale the off-diagonal block */ 2030 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2031 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2032 } 2033 PetscFunctionReturn(PETSC_SUCCESS); 2034 } 2035 2036 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2037 { 2038 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2039 2040 PetscFunctionBegin; 2041 PetscCall(MatSetUnfactored(a->A)); 2042 PetscFunctionReturn(PETSC_SUCCESS); 2043 } 2044 2045 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2046 { 2047 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2048 Mat a, b, c, d; 2049 PetscBool flg; 2050 2051 PetscFunctionBegin; 2052 a = matA->A; 2053 b = matA->B; 2054 c = matB->A; 2055 d = matB->B; 2056 2057 PetscCall(MatEqual(a, c, &flg)); 2058 if (flg) PetscCall(MatEqual(b, d, &flg)); 2059 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2060 PetscFunctionReturn(PETSC_SUCCESS); 2061 } 2062 2063 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2064 { 2065 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2066 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2067 2068 PetscFunctionBegin; 2069 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2070 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2071 /* because of the column compression in the off-processor part of the matrix a->B, 2072 the number of columns in a->B and b->B may be different, hence we cannot call 2073 the MatCopy() directly on the two parts. If need be, we can provide a more 2074 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2075 then copying the submatrices */ 2076 PetscCall(MatCopy_Basic(A, B, str)); 2077 } else { 2078 PetscCall(MatCopy(a->A, b->A, str)); 2079 PetscCall(MatCopy(a->B, b->B, str)); 2080 } 2081 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2082 PetscFunctionReturn(PETSC_SUCCESS); 2083 } 2084 2085 /* 2086 Computes the number of nonzeros per row needed for preallocation when X and Y 2087 have different nonzero structure. 2088 */ 2089 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2090 { 2091 PetscInt i, j, k, nzx, nzy; 2092 2093 PetscFunctionBegin; 2094 /* Set the number of nonzeros in the new matrix */ 2095 for (i = 0; i < m; i++) { 2096 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2097 nzx = xi[i + 1] - xi[i]; 2098 nzy = yi[i + 1] - yi[i]; 2099 nnz[i] = 0; 2100 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2101 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2102 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2103 nnz[i]++; 2104 } 2105 for (; k < nzy; k++) nnz[i]++; 2106 } 2107 PetscFunctionReturn(PETSC_SUCCESS); 2108 } 2109 2110 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2111 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2112 { 2113 PetscInt m = Y->rmap->N; 2114 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2115 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2116 2117 PetscFunctionBegin; 2118 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2119 PetscFunctionReturn(PETSC_SUCCESS); 2120 } 2121 2122 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2123 { 2124 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2125 2126 PetscFunctionBegin; 2127 if (str == SAME_NONZERO_PATTERN) { 2128 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2129 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2130 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2131 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2132 } else { 2133 Mat B; 2134 PetscInt *nnz_d, *nnz_o; 2135 2136 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2137 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2138 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2139 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2140 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2141 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2142 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2143 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2144 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2145 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2146 PetscCall(MatHeaderMerge(Y, &B)); 2147 PetscCall(PetscFree(nnz_d)); 2148 PetscCall(PetscFree(nnz_o)); 2149 } 2150 PetscFunctionReturn(PETSC_SUCCESS); 2151 } 2152 2153 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2154 2155 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2156 { 2157 PetscFunctionBegin; 2158 if (PetscDefined(USE_COMPLEX)) { 2159 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2160 2161 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2162 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2163 } 2164 PetscFunctionReturn(PETSC_SUCCESS); 2165 } 2166 2167 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2168 { 2169 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2170 2171 PetscFunctionBegin; 2172 PetscCall(MatRealPart(a->A)); 2173 PetscCall(MatRealPart(a->B)); 2174 PetscFunctionReturn(PETSC_SUCCESS); 2175 } 2176 2177 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2178 { 2179 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2180 2181 PetscFunctionBegin; 2182 PetscCall(MatImaginaryPart(a->A)); 2183 PetscCall(MatImaginaryPart(a->B)); 2184 PetscFunctionReturn(PETSC_SUCCESS); 2185 } 2186 2187 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2188 { 2189 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2190 PetscInt i, *idxb = NULL, m = A->rmap->n; 2191 PetscScalar *va, *vv; 2192 Vec vB, vA; 2193 const PetscScalar *vb; 2194 2195 PetscFunctionBegin; 2196 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2197 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2198 2199 PetscCall(VecGetArrayWrite(vA, &va)); 2200 if (idx) { 2201 for (i = 0; i < m; i++) { 2202 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2203 } 2204 } 2205 2206 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2207 PetscCall(PetscMalloc1(m, &idxb)); 2208 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2209 2210 PetscCall(VecGetArrayWrite(v, &vv)); 2211 PetscCall(VecGetArrayRead(vB, &vb)); 2212 for (i = 0; i < m; i++) { 2213 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2214 vv[i] = vb[i]; 2215 if (idx) idx[i] = a->garray[idxb[i]]; 2216 } else { 2217 vv[i] = va[i]; 2218 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2219 } 2220 } 2221 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2222 PetscCall(VecRestoreArrayWrite(vA, &va)); 2223 PetscCall(VecRestoreArrayRead(vB, &vb)); 2224 PetscCall(PetscFree(idxb)); 2225 PetscCall(VecDestroy(&vA)); 2226 PetscCall(VecDestroy(&vB)); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2231 { 2232 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2233 PetscInt m = A->rmap->n, n = A->cmap->n; 2234 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2235 PetscInt *cmap = mat->garray; 2236 PetscInt *diagIdx, *offdiagIdx; 2237 Vec diagV, offdiagV; 2238 PetscScalar *a, *diagA, *offdiagA; 2239 const PetscScalar *ba, *bav; 2240 PetscInt r, j, col, ncols, *bi, *bj; 2241 Mat B = mat->B; 2242 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2243 2244 PetscFunctionBegin; 2245 /* When a process holds entire A and other processes have no entry */ 2246 if (A->cmap->N == n) { 2247 PetscCall(VecGetArrayWrite(v, &diagA)); 2248 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2250 PetscCall(VecDestroy(&diagV)); 2251 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2252 PetscFunctionReturn(PETSC_SUCCESS); 2253 } else if (n == 0) { 2254 if (m) { 2255 PetscCall(VecGetArrayWrite(v, &a)); 2256 for (r = 0; r < m; r++) { 2257 a[r] = 0.0; 2258 if (idx) idx[r] = -1; 2259 } 2260 PetscCall(VecRestoreArrayWrite(v, &a)); 2261 } 2262 PetscFunctionReturn(PETSC_SUCCESS); 2263 } 2264 2265 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2266 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2268 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2269 2270 /* Get offdiagIdx[] for implicit 0.0 */ 2271 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2272 ba = bav; 2273 bi = b->i; 2274 bj = b->j; 2275 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2276 for (r = 0; r < m; r++) { 2277 ncols = bi[r + 1] - bi[r]; 2278 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2279 offdiagA[r] = *ba; 2280 offdiagIdx[r] = cmap[0]; 2281 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2282 offdiagA[r] = 0.0; 2283 2284 /* Find first hole in the cmap */ 2285 for (j = 0; j < ncols; j++) { 2286 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2287 if (col > j && j < cstart) { 2288 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2289 break; 2290 } else if (col > j + n && j >= cstart) { 2291 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2292 break; 2293 } 2294 } 2295 if (j == ncols && ncols < A->cmap->N - n) { 2296 /* a hole is outside compressed Bcols */ 2297 if (ncols == 0) { 2298 if (cstart) { 2299 offdiagIdx[r] = 0; 2300 } else offdiagIdx[r] = cend; 2301 } else { /* ncols > 0 */ 2302 offdiagIdx[r] = cmap[ncols - 1] + 1; 2303 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2304 } 2305 } 2306 } 2307 2308 for (j = 0; j < ncols; j++) { 2309 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2310 offdiagA[r] = *ba; 2311 offdiagIdx[r] = cmap[*bj]; 2312 } 2313 ba++; 2314 bj++; 2315 } 2316 } 2317 2318 PetscCall(VecGetArrayWrite(v, &a)); 2319 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 for (r = 0; r < m; ++r) { 2321 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) idx[r] = cstart + diagIdx[r]; 2324 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2325 a[r] = diagA[r]; 2326 if (idx) { 2327 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2328 idx[r] = cstart + diagIdx[r]; 2329 } else idx[r] = offdiagIdx[r]; 2330 } 2331 } else { 2332 a[r] = offdiagA[r]; 2333 if (idx) idx[r] = offdiagIdx[r]; 2334 } 2335 } 2336 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2337 PetscCall(VecRestoreArrayWrite(v, &a)); 2338 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2339 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecDestroy(&offdiagV)); 2342 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2343 PetscFunctionReturn(PETSC_SUCCESS); 2344 } 2345 2346 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347 { 2348 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2349 PetscInt m = A->rmap->n, n = A->cmap->n; 2350 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2351 PetscInt *cmap = mat->garray; 2352 PetscInt *diagIdx, *offdiagIdx; 2353 Vec diagV, offdiagV; 2354 PetscScalar *a, *diagA, *offdiagA; 2355 const PetscScalar *ba, *bav; 2356 PetscInt r, j, col, ncols, *bi, *bj; 2357 Mat B = mat->B; 2358 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2359 2360 PetscFunctionBegin; 2361 /* When a process holds entire A and other processes have no entry */ 2362 if (A->cmap->N == n) { 2363 PetscCall(VecGetArrayWrite(v, &diagA)); 2364 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2366 PetscCall(VecDestroy(&diagV)); 2367 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2368 PetscFunctionReturn(PETSC_SUCCESS); 2369 } else if (n == 0) { 2370 if (m) { 2371 PetscCall(VecGetArrayWrite(v, &a)); 2372 for (r = 0; r < m; r++) { 2373 a[r] = PETSC_MAX_REAL; 2374 if (idx) idx[r] = -1; 2375 } 2376 PetscCall(VecRestoreArrayWrite(v, &a)); 2377 } 2378 PetscFunctionReturn(PETSC_SUCCESS); 2379 } 2380 2381 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2382 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2384 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2385 2386 /* Get offdiagIdx[] for implicit 0.0 */ 2387 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2388 ba = bav; 2389 bi = b->i; 2390 bj = b->j; 2391 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2392 for (r = 0; r < m; r++) { 2393 ncols = bi[r + 1] - bi[r]; 2394 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2395 offdiagA[r] = *ba; 2396 offdiagIdx[r] = cmap[0]; 2397 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2398 offdiagA[r] = 0.0; 2399 2400 /* Find first hole in the cmap */ 2401 for (j = 0; j < ncols; j++) { 2402 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2403 if (col > j && j < cstart) { 2404 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2405 break; 2406 } else if (col > j + n && j >= cstart) { 2407 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2408 break; 2409 } 2410 } 2411 if (j == ncols && ncols < A->cmap->N - n) { 2412 /* a hole is outside compressed Bcols */ 2413 if (ncols == 0) { 2414 if (cstart) { 2415 offdiagIdx[r] = 0; 2416 } else offdiagIdx[r] = cend; 2417 } else { /* ncols > 0 */ 2418 offdiagIdx[r] = cmap[ncols - 1] + 1; 2419 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2420 } 2421 } 2422 } 2423 2424 for (j = 0; j < ncols; j++) { 2425 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2426 offdiagA[r] = *ba; 2427 offdiagIdx[r] = cmap[*bj]; 2428 } 2429 ba++; 2430 bj++; 2431 } 2432 } 2433 2434 PetscCall(VecGetArrayWrite(v, &a)); 2435 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 for (r = 0; r < m; ++r) { 2437 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) idx[r] = cstart + diagIdx[r]; 2440 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 if (idx) { 2443 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2444 idx[r] = cstart + diagIdx[r]; 2445 } else idx[r] = offdiagIdx[r]; 2446 } 2447 } else { 2448 a[r] = offdiagA[r]; 2449 if (idx) idx[r] = offdiagIdx[r]; 2450 } 2451 } 2452 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2453 PetscCall(VecRestoreArrayWrite(v, &a)); 2454 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2455 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecDestroy(&offdiagV)); 2458 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2459 PetscFunctionReturn(PETSC_SUCCESS); 2460 } 2461 2462 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2463 { 2464 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2465 PetscInt m = A->rmap->n, n = A->cmap->n; 2466 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2467 PetscInt *cmap = mat->garray; 2468 PetscInt *diagIdx, *offdiagIdx; 2469 Vec diagV, offdiagV; 2470 PetscScalar *a, *diagA, *offdiagA; 2471 const PetscScalar *ba, *bav; 2472 PetscInt r, j, col, ncols, *bi, *bj; 2473 Mat B = mat->B; 2474 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2475 2476 PetscFunctionBegin; 2477 /* When a process holds entire A and other processes have no entry */ 2478 if (A->cmap->N == n) { 2479 PetscCall(VecGetArrayWrite(v, &diagA)); 2480 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2482 PetscCall(VecDestroy(&diagV)); 2483 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2484 PetscFunctionReturn(PETSC_SUCCESS); 2485 } else if (n == 0) { 2486 if (m) { 2487 PetscCall(VecGetArrayWrite(v, &a)); 2488 for (r = 0; r < m; r++) { 2489 a[r] = PETSC_MIN_REAL; 2490 if (idx) idx[r] = -1; 2491 } 2492 PetscCall(VecRestoreArrayWrite(v, &a)); 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2498 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2500 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2501 2502 /* Get offdiagIdx[] for implicit 0.0 */ 2503 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2504 ba = bav; 2505 bi = b->i; 2506 bj = b->j; 2507 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2508 for (r = 0; r < m; r++) { 2509 ncols = bi[r + 1] - bi[r]; 2510 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2511 offdiagA[r] = *ba; 2512 offdiagIdx[r] = cmap[0]; 2513 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2514 offdiagA[r] = 0.0; 2515 2516 /* Find first hole in the cmap */ 2517 for (j = 0; j < ncols; j++) { 2518 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2519 if (col > j && j < cstart) { 2520 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2521 break; 2522 } else if (col > j + n && j >= cstart) { 2523 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2524 break; 2525 } 2526 } 2527 if (j == ncols && ncols < A->cmap->N - n) { 2528 /* a hole is outside compressed Bcols */ 2529 if (ncols == 0) { 2530 if (cstart) { 2531 offdiagIdx[r] = 0; 2532 } else offdiagIdx[r] = cend; 2533 } else { /* ncols > 0 */ 2534 offdiagIdx[r] = cmap[ncols - 1] + 1; 2535 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2536 } 2537 } 2538 } 2539 2540 for (j = 0; j < ncols; j++) { 2541 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2542 offdiagA[r] = *ba; 2543 offdiagIdx[r] = cmap[*bj]; 2544 } 2545 ba++; 2546 bj++; 2547 } 2548 } 2549 2550 PetscCall(VecGetArrayWrite(v, &a)); 2551 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 for (r = 0; r < m; ++r) { 2553 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) idx[r] = cstart + diagIdx[r]; 2556 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2557 a[r] = diagA[r]; 2558 if (idx) { 2559 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2560 idx[r] = cstart + diagIdx[r]; 2561 } else idx[r] = offdiagIdx[r]; 2562 } 2563 } else { 2564 a[r] = offdiagA[r]; 2565 if (idx) idx[r] = offdiagIdx[r]; 2566 } 2567 } 2568 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2569 PetscCall(VecRestoreArrayWrite(v, &a)); 2570 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2571 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2572 PetscCall(VecDestroy(&diagV)); 2573 PetscCall(VecDestroy(&offdiagV)); 2574 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2575 PetscFunctionReturn(PETSC_SUCCESS); 2576 } 2577 2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2579 { 2580 Mat *dummy; 2581 2582 PetscFunctionBegin; 2583 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2584 *newmat = *dummy; 2585 PetscCall(PetscFree(dummy)); 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2590 { 2591 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2592 2593 PetscFunctionBegin; 2594 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2595 A->factorerrortype = a->A->factorerrortype; 2596 PetscFunctionReturn(PETSC_SUCCESS); 2597 } 2598 2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2600 { 2601 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2602 2603 PetscFunctionBegin; 2604 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2605 PetscCall(MatSetRandom(aij->A, rctx)); 2606 if (x->assembled) { 2607 PetscCall(MatSetRandom(aij->B, rctx)); 2608 } else { 2609 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2610 } 2611 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2612 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2613 PetscFunctionReturn(PETSC_SUCCESS); 2614 } 2615 2616 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2617 { 2618 PetscFunctionBegin; 2619 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2620 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2621 PetscFunctionReturn(PETSC_SUCCESS); 2622 } 2623 2624 /*@ 2625 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2626 2627 Not collective 2628 2629 Input Parameter: 2630 . A - the matrix 2631 2632 Output Parameter: 2633 . nz - the number of nonzeros 2634 2635 Level: advanced 2636 2637 .seealso: `MATMPIAIJ`, `Mat` 2638 @*/ 2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2640 { 2641 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2642 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2643 2644 PetscFunctionBegin; 2645 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2646 PetscFunctionReturn(PETSC_SUCCESS); 2647 } 2648 2649 /*@ 2650 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2651 2652 Collective 2653 2654 Input Parameters: 2655 + A - the matrix 2656 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2657 2658 Level: advanced 2659 2660 @*/ 2661 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2662 { 2663 PetscFunctionBegin; 2664 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2665 PetscFunctionReturn(PETSC_SUCCESS); 2666 } 2667 2668 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2669 { 2670 PetscBool sc = PETSC_FALSE, flg; 2671 2672 PetscFunctionBegin; 2673 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2674 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2675 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2676 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2677 PetscOptionsHeadEnd(); 2678 PetscFunctionReturn(PETSC_SUCCESS); 2679 } 2680 2681 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2682 { 2683 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2684 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2685 2686 PetscFunctionBegin; 2687 if (!Y->preallocated) { 2688 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2689 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2690 PetscInt nonew = aij->nonew; 2691 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2692 aij->nonew = nonew; 2693 } 2694 PetscCall(MatShift_Basic(Y, a)); 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2704 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2705 if (d) { 2706 PetscInt rstart; 2707 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2708 *d += rstart; 2709 } 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2714 { 2715 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2716 2717 PetscFunctionBegin; 2718 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2719 PetscFunctionReturn(PETSC_SUCCESS); 2720 } 2721 2722 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2723 { 2724 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2725 2726 PetscFunctionBegin; 2727 PetscCall(MatEliminateZeros(a->A)); 2728 PetscCall(MatEliminateZeros(a->B)); 2729 PetscFunctionReturn(PETSC_SUCCESS); 2730 } 2731 2732 /* -------------------------------------------------------------------*/ 2733 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2734 MatGetRow_MPIAIJ, 2735 MatRestoreRow_MPIAIJ, 2736 MatMult_MPIAIJ, 2737 /* 4*/ MatMultAdd_MPIAIJ, 2738 MatMultTranspose_MPIAIJ, 2739 MatMultTransposeAdd_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*10*/ NULL, 2744 NULL, 2745 NULL, 2746 MatSOR_MPIAIJ, 2747 MatTranspose_MPIAIJ, 2748 /*15*/ MatGetInfo_MPIAIJ, 2749 MatEqual_MPIAIJ, 2750 MatGetDiagonal_MPIAIJ, 2751 MatDiagonalScale_MPIAIJ, 2752 MatNorm_MPIAIJ, 2753 /*20*/ MatAssemblyBegin_MPIAIJ, 2754 MatAssemblyEnd_MPIAIJ, 2755 MatSetOption_MPIAIJ, 2756 MatZeroEntries_MPIAIJ, 2757 /*24*/ MatZeroRows_MPIAIJ, 2758 NULL, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*29*/ MatSetUp_MPI_Hash, 2763 NULL, 2764 NULL, 2765 MatGetDiagonalBlock_MPIAIJ, 2766 NULL, 2767 /*34*/ MatDuplicate_MPIAIJ, 2768 NULL, 2769 NULL, 2770 NULL, 2771 NULL, 2772 /*39*/ MatAXPY_MPIAIJ, 2773 MatCreateSubMatrices_MPIAIJ, 2774 MatIncreaseOverlap_MPIAIJ, 2775 MatGetValues_MPIAIJ, 2776 MatCopy_MPIAIJ, 2777 /*44*/ MatGetRowMax_MPIAIJ, 2778 MatScale_MPIAIJ, 2779 MatShift_MPIAIJ, 2780 MatDiagonalSet_MPIAIJ, 2781 MatZeroRowsColumns_MPIAIJ, 2782 /*49*/ MatSetRandom_MPIAIJ, 2783 MatGetRowIJ_MPIAIJ, 2784 MatRestoreRowIJ_MPIAIJ, 2785 NULL, 2786 NULL, 2787 /*54*/ MatFDColoringCreate_MPIXAIJ, 2788 NULL, 2789 MatSetUnfactored_MPIAIJ, 2790 MatPermute_MPIAIJ, 2791 NULL, 2792 /*59*/ MatCreateSubMatrix_MPIAIJ, 2793 MatDestroy_MPIAIJ, 2794 MatView_MPIAIJ, 2795 NULL, 2796 NULL, 2797 /*64*/ NULL, 2798 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2799 NULL, 2800 NULL, 2801 NULL, 2802 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2803 MatGetRowMinAbs_MPIAIJ, 2804 NULL, 2805 NULL, 2806 NULL, 2807 NULL, 2808 /*75*/ MatFDColoringApply_AIJ, 2809 MatSetFromOptions_MPIAIJ, 2810 NULL, 2811 NULL, 2812 MatFindZeroDiagonals_MPIAIJ, 2813 /*80*/ NULL, 2814 NULL, 2815 NULL, 2816 /*83*/ MatLoad_MPIAIJ, 2817 MatIsSymmetric_MPIAIJ, 2818 NULL, 2819 NULL, 2820 NULL, 2821 NULL, 2822 /*89*/ NULL, 2823 NULL, 2824 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2825 NULL, 2826 NULL, 2827 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2828 NULL, 2829 NULL, 2830 NULL, 2831 MatBindToCPU_MPIAIJ, 2832 /*99*/ MatProductSetFromOptions_MPIAIJ, 2833 NULL, 2834 NULL, 2835 MatConjugate_MPIAIJ, 2836 NULL, 2837 /*104*/ MatSetValuesRow_MPIAIJ, 2838 MatRealPart_MPIAIJ, 2839 MatImaginaryPart_MPIAIJ, 2840 NULL, 2841 NULL, 2842 /*109*/ NULL, 2843 NULL, 2844 MatGetRowMin_MPIAIJ, 2845 NULL, 2846 MatMissingDiagonal_MPIAIJ, 2847 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2848 NULL, 2849 MatGetGhosts_MPIAIJ, 2850 NULL, 2851 NULL, 2852 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2853 NULL, 2854 NULL, 2855 NULL, 2856 MatGetMultiProcBlock_MPIAIJ, 2857 /*124*/ MatFindNonzeroRows_MPIAIJ, 2858 MatGetColumnReductions_MPIAIJ, 2859 MatInvertBlockDiagonal_MPIAIJ, 2860 MatInvertVariableBlockDiagonal_MPIAIJ, 2861 MatCreateSubMatricesMPI_MPIAIJ, 2862 /*129*/ NULL, 2863 NULL, 2864 NULL, 2865 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2866 NULL, 2867 /*134*/ NULL, 2868 NULL, 2869 NULL, 2870 NULL, 2871 NULL, 2872 /*139*/ MatSetBlockSizes_MPIAIJ, 2873 NULL, 2874 NULL, 2875 MatFDColoringSetUp_MPIXAIJ, 2876 MatFindOffBlockDiagonalEntries_MPIAIJ, 2877 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2878 /*145*/ NULL, 2879 NULL, 2880 NULL, 2881 MatCreateGraph_Simple_AIJ, 2882 NULL, 2883 /*150*/ NULL, 2884 MatEliminateZeros_MPIAIJ}; 2885 2886 /* ----------------------------------------------------------------------------------------*/ 2887 2888 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2889 { 2890 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2891 2892 PetscFunctionBegin; 2893 PetscCall(MatStoreValues(aij->A)); 2894 PetscCall(MatStoreValues(aij->B)); 2895 PetscFunctionReturn(PETSC_SUCCESS); 2896 } 2897 2898 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2899 { 2900 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2901 2902 PetscFunctionBegin; 2903 PetscCall(MatRetrieveValues(aij->A)); 2904 PetscCall(MatRetrieveValues(aij->B)); 2905 PetscFunctionReturn(PETSC_SUCCESS); 2906 } 2907 2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2909 { 2910 Mat_MPIAIJ *b; 2911 PetscMPIInt size; 2912 2913 PetscFunctionBegin; 2914 PetscCall(PetscLayoutSetUp(B->rmap)); 2915 PetscCall(PetscLayoutSetUp(B->cmap)); 2916 b = (Mat_MPIAIJ *)B->data; 2917 2918 #if defined(PETSC_USE_CTABLE) 2919 PetscCall(PetscHMapIDestroy(&b->colmap)); 2920 #else 2921 PetscCall(PetscFree(b->colmap)); 2922 #endif 2923 PetscCall(PetscFree(b->garray)); 2924 PetscCall(VecDestroy(&b->lvec)); 2925 PetscCall(VecScatterDestroy(&b->Mvctx)); 2926 2927 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2928 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2929 PetscCall(MatDestroy(&b->B)); 2930 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2931 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2932 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2933 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2934 2935 if (!B->preallocated) { 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 } 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b; 2953 2954 PetscFunctionBegin; 2955 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2956 PetscCall(PetscLayoutSetUp(B->rmap)); 2957 PetscCall(PetscLayoutSetUp(B->cmap)); 2958 b = (Mat_MPIAIJ *)B->data; 2959 2960 #if defined(PETSC_USE_CTABLE) 2961 PetscCall(PetscHMapIDestroy(&b->colmap)); 2962 #else 2963 PetscCall(PetscFree(b->colmap)); 2964 #endif 2965 PetscCall(PetscFree(b->garray)); 2966 PetscCall(VecDestroy(&b->lvec)); 2967 PetscCall(VecScatterDestroy(&b->Mvctx)); 2968 2969 PetscCall(MatResetPreallocation(b->A)); 2970 PetscCall(MatResetPreallocation(b->B)); 2971 B->preallocated = PETSC_TRUE; 2972 B->was_assembled = PETSC_FALSE; 2973 B->assembled = PETSC_FALSE; 2974 PetscFunctionReturn(PETSC_SUCCESS); 2975 } 2976 2977 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2978 { 2979 Mat mat; 2980 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2981 2982 PetscFunctionBegin; 2983 *newmat = NULL; 2984 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2985 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2986 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2987 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2988 a = (Mat_MPIAIJ *)mat->data; 2989 2990 mat->factortype = matin->factortype; 2991 mat->assembled = matin->assembled; 2992 mat->insertmode = NOT_SET_VALUES; 2993 mat->preallocated = matin->preallocated; 2994 2995 a->size = oldmat->size; 2996 a->rank = oldmat->rank; 2997 a->donotstash = oldmat->donotstash; 2998 a->roworiented = oldmat->roworiented; 2999 a->rowindices = NULL; 3000 a->rowvalues = NULL; 3001 a->getrowactive = PETSC_FALSE; 3002 3003 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3004 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3005 3006 if (oldmat->colmap) { 3007 #if defined(PETSC_USE_CTABLE) 3008 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3009 #else 3010 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3011 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3012 #endif 3013 } else a->colmap = NULL; 3014 if (oldmat->garray) { 3015 PetscInt len; 3016 len = oldmat->B->cmap->n; 3017 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3018 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3019 } else a->garray = NULL; 3020 3021 /* It may happen MatDuplicate is called with a non-assembled matrix 3022 In fact, MatDuplicate only requires the matrix to be preallocated 3023 This may happen inside a DMCreateMatrix_Shell */ 3024 if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); } 3025 if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); } 3026 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3027 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3028 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3029 *newmat = mat; 3030 PetscFunctionReturn(PETSC_SUCCESS); 3031 } 3032 3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3034 { 3035 PetscBool isbinary, ishdf5; 3036 3037 PetscFunctionBegin; 3038 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3039 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3040 /* force binary viewer to load .info file if it has not yet done so */ 3041 PetscCall(PetscViewerSetUp(viewer)); 3042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3043 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3044 if (isbinary) { 3045 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3046 } else if (ishdf5) { 3047 #if defined(PETSC_HAVE_HDF5) 3048 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3049 #else 3050 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3051 #endif 3052 } else { 3053 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3054 } 3055 PetscFunctionReturn(PETSC_SUCCESS); 3056 } 3057 3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3059 { 3060 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3061 PetscInt *rowidxs, *colidxs; 3062 PetscScalar *matvals; 3063 3064 PetscFunctionBegin; 3065 PetscCall(PetscViewerSetUp(viewer)); 3066 3067 /* read in matrix header */ 3068 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3069 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3070 M = header[1]; 3071 N = header[2]; 3072 nz = header[3]; 3073 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3074 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3075 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3076 3077 /* set block sizes from the viewer's .info file */ 3078 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3079 /* set global sizes if not set already */ 3080 if (mat->rmap->N < 0) mat->rmap->N = M; 3081 if (mat->cmap->N < 0) mat->cmap->N = N; 3082 PetscCall(PetscLayoutSetUp(mat->rmap)); 3083 PetscCall(PetscLayoutSetUp(mat->cmap)); 3084 3085 /* check if the matrix sizes are correct */ 3086 PetscCall(MatGetSize(mat, &rows, &cols)); 3087 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3088 3089 /* read in row lengths and build row indices */ 3090 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3091 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3092 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3093 rowidxs[0] = 0; 3094 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3095 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3096 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3097 /* read in column indices and matrix values */ 3098 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3100 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3101 /* store matrix indices and values */ 3102 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3103 PetscCall(PetscFree(rowidxs)); 3104 PetscCall(PetscFree2(colidxs, matvals)); 3105 PetscFunctionReturn(PETSC_SUCCESS); 3106 } 3107 3108 /* Not scalable because of ISAllGather() unless getting all columns. */ 3109 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3110 { 3111 IS iscol_local; 3112 PetscBool isstride; 3113 PetscMPIInt lisstride = 0, gisstride; 3114 3115 PetscFunctionBegin; 3116 /* check if we are grabbing all columns*/ 3117 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3118 3119 if (isstride) { 3120 PetscInt start, len, mstart, mlen; 3121 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3122 PetscCall(ISGetLocalSize(iscol, &len)); 3123 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3124 if (mstart == start && mlen - mstart == len) lisstride = 1; 3125 } 3126 3127 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3128 if (gisstride) { 3129 PetscInt N; 3130 PetscCall(MatGetSize(mat, NULL, &N)); 3131 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3132 PetscCall(ISSetIdentity(iscol_local)); 3133 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3134 } else { 3135 PetscInt cbs; 3136 PetscCall(ISGetBlockSize(iscol, &cbs)); 3137 PetscCall(ISAllGather(iscol, &iscol_local)); 3138 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3139 } 3140 3141 *isseq = iscol_local; 3142 PetscFunctionReturn(PETSC_SUCCESS); 3143 } 3144 3145 /* 3146 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3147 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3148 3149 Input Parameters: 3150 mat - matrix 3151 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3152 i.e., mat->rstart <= isrow[i] < mat->rend 3153 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3154 i.e., mat->cstart <= iscol[i] < mat->cend 3155 Output Parameter: 3156 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3157 iscol_o - sequential column index set for retrieving mat->B 3158 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3159 */ 3160 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3161 { 3162 Vec x, cmap; 3163 const PetscInt *is_idx; 3164 PetscScalar *xarray, *cmaparray; 3165 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3167 Mat B = a->B; 3168 Vec lvec = a->lvec, lcmap; 3169 PetscInt i, cstart, cend, Bn = B->cmap->N; 3170 MPI_Comm comm; 3171 VecScatter Mvctx = a->Mvctx; 3172 3173 PetscFunctionBegin; 3174 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3175 PetscCall(ISGetLocalSize(iscol, &ncols)); 3176 3177 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3178 PetscCall(MatCreateVecs(mat, &x, NULL)); 3179 PetscCall(VecSet(x, -1.0)); 3180 PetscCall(VecDuplicate(x, &cmap)); 3181 PetscCall(VecSet(cmap, -1.0)); 3182 3183 /* Get start indices */ 3184 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3185 isstart -= ncols; 3186 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3187 3188 PetscCall(ISGetIndices(iscol, &is_idx)); 3189 PetscCall(VecGetArray(x, &xarray)); 3190 PetscCall(VecGetArray(cmap, &cmaparray)); 3191 PetscCall(PetscMalloc1(ncols, &idx)); 3192 for (i = 0; i < ncols; i++) { 3193 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3194 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3195 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3196 } 3197 PetscCall(VecRestoreArray(x, &xarray)); 3198 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3199 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3200 3201 /* Get iscol_d */ 3202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3203 PetscCall(ISGetBlockSize(iscol, &i)); 3204 PetscCall(ISSetBlockSize(*iscol_d, i)); 3205 3206 /* Get isrow_d */ 3207 PetscCall(ISGetLocalSize(isrow, &m)); 3208 rstart = mat->rmap->rstart; 3209 PetscCall(PetscMalloc1(m, &idx)); 3210 PetscCall(ISGetIndices(isrow, &is_idx)); 3211 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3212 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3213 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3215 PetscCall(ISGetBlockSize(isrow, &i)); 3216 PetscCall(ISSetBlockSize(*isrow_d, i)); 3217 3218 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3219 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 PetscCall(VecDuplicate(lvec, &lcmap)); 3223 3224 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3225 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 3227 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3228 /* off-process column indices */ 3229 count = 0; 3230 PetscCall(PetscMalloc1(Bn, &idx)); 3231 PetscCall(PetscMalloc1(Bn, &cmap1)); 3232 3233 PetscCall(VecGetArray(lvec, &xarray)); 3234 PetscCall(VecGetArray(lcmap, &cmaparray)); 3235 for (i = 0; i < Bn; i++) { 3236 if (PetscRealPart(xarray[i]) > -1.0) { 3237 idx[count] = i; /* local column index in off-diagonal part B */ 3238 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3239 count++; 3240 } 3241 } 3242 PetscCall(VecRestoreArray(lvec, &xarray)); 3243 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3244 3245 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3246 /* cannot ensure iscol_o has same blocksize as iscol! */ 3247 3248 PetscCall(PetscFree(idx)); 3249 *garray = cmap1; 3250 3251 PetscCall(VecDestroy(&x)); 3252 PetscCall(VecDestroy(&cmap)); 3253 PetscCall(VecDestroy(&lcmap)); 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3259 { 3260 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3261 Mat M = NULL; 3262 MPI_Comm comm; 3263 IS iscol_d, isrow_d, iscol_o; 3264 Mat Asub = NULL, Bsub = NULL; 3265 PetscInt n; 3266 3267 PetscFunctionBegin; 3268 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3269 3270 if (call == MAT_REUSE_MATRIX) { 3271 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3273 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3276 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3277 3278 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3279 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3280 3281 /* Update diagonal and off-diagonal portions of submat */ 3282 asub = (Mat_MPIAIJ *)(*submat)->data; 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3284 PetscCall(ISGetLocalSize(iscol_o, &n)); 3285 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3286 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3287 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3288 3289 } else { /* call == MAT_INITIAL_MATRIX) */ 3290 const PetscInt *garray; 3291 PetscInt BsubN; 3292 3293 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3294 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3295 3296 /* Create local submatrices Asub and Bsub */ 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3299 3300 /* Create submatrix M */ 3301 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3302 3303 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3304 asub = (Mat_MPIAIJ *)M->data; 3305 3306 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3307 n = asub->B->cmap->N; 3308 if (BsubN > n) { 3309 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3310 const PetscInt *idx; 3311 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3312 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3313 3314 PetscCall(PetscMalloc1(n, &idx_new)); 3315 j = 0; 3316 PetscCall(ISGetIndices(iscol_o, &idx)); 3317 for (i = 0; i < n; i++) { 3318 if (j >= BsubN) break; 3319 while (subgarray[i] > garray[j]) j++; 3320 3321 if (subgarray[i] == garray[j]) { 3322 idx_new[i] = idx[j++]; 3323 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3324 } 3325 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3326 3327 PetscCall(ISDestroy(&iscol_o)); 3328 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3329 3330 } else if (BsubN < n) { 3331 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3332 } 3333 3334 PetscCall(PetscFree(garray)); 3335 *submat = M; 3336 3337 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3338 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3339 PetscCall(ISDestroy(&isrow_d)); 3340 3341 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3342 PetscCall(ISDestroy(&iscol_d)); 3343 3344 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3345 PetscCall(ISDestroy(&iscol_o)); 3346 } 3347 PetscFunctionReturn(PETSC_SUCCESS); 3348 } 3349 3350 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3351 { 3352 IS iscol_local = NULL, isrow_d; 3353 PetscInt csize; 3354 PetscInt n, i, j, start, end; 3355 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3356 MPI_Comm comm; 3357 3358 PetscFunctionBegin; 3359 /* If isrow has same processor distribution as mat, 3360 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3361 if (call == MAT_REUSE_MATRIX) { 3362 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3363 if (isrow_d) { 3364 sameRowDist = PETSC_TRUE; 3365 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3366 } else { 3367 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3368 if (iscol_local) { 3369 sameRowDist = PETSC_TRUE; 3370 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3371 } 3372 } 3373 } else { 3374 /* Check if isrow has same processor distribution as mat */ 3375 sameDist[0] = PETSC_FALSE; 3376 PetscCall(ISGetLocalSize(isrow, &n)); 3377 if (!n) { 3378 sameDist[0] = PETSC_TRUE; 3379 } else { 3380 PetscCall(ISGetMinMax(isrow, &i, &j)); 3381 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3382 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3383 } 3384 3385 /* Check if iscol has same processor distribution as mat */ 3386 sameDist[1] = PETSC_FALSE; 3387 PetscCall(ISGetLocalSize(iscol, &n)); 3388 if (!n) { 3389 sameDist[1] = PETSC_TRUE; 3390 } else { 3391 PetscCall(ISGetMinMax(iscol, &i, &j)); 3392 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3393 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3394 } 3395 3396 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3397 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3398 sameRowDist = tsameDist[0]; 3399 } 3400 3401 if (sameRowDist) { 3402 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3403 /* isrow and iscol have same processor distribution as mat */ 3404 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3405 PetscFunctionReturn(PETSC_SUCCESS); 3406 } else { /* sameRowDist */ 3407 /* isrow has same processor distribution as mat */ 3408 if (call == MAT_INITIAL_MATRIX) { 3409 PetscBool sorted; 3410 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3411 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3412 PetscCall(ISGetSize(iscol, &i)); 3413 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3414 3415 PetscCall(ISSorted(iscol_local, &sorted)); 3416 if (sorted) { 3417 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3418 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3419 PetscFunctionReturn(PETSC_SUCCESS); 3420 } 3421 } else { /* call == MAT_REUSE_MATRIX */ 3422 IS iscol_sub; 3423 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3424 if (iscol_sub) { 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } 3428 } 3429 } 3430 } 3431 3432 /* General case: iscol -> iscol_local which has global size of iscol */ 3433 if (call == MAT_REUSE_MATRIX) { 3434 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3435 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3436 } else { 3437 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3438 } 3439 3440 PetscCall(ISGetLocalSize(iscol, &csize)); 3441 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3442 3443 if (call == MAT_INITIAL_MATRIX) { 3444 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3445 PetscCall(ISDestroy(&iscol_local)); 3446 } 3447 PetscFunctionReturn(PETSC_SUCCESS); 3448 } 3449 3450 /*@C 3451 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3452 and "off-diagonal" part of the matrix in CSR format. 3453 3454 Collective 3455 3456 Input Parameters: 3457 + comm - MPI communicator 3458 . A - "diagonal" portion of matrix 3459 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3460 - garray - global index of B columns 3461 3462 Output Parameter: 3463 . mat - the matrix, with input A as its local diagonal matrix 3464 Level: advanced 3465 3466 Notes: 3467 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3468 3469 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3470 3471 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3472 @*/ 3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3474 { 3475 Mat_MPIAIJ *maij; 3476 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3477 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3478 const PetscScalar *oa; 3479 Mat Bnew; 3480 PetscInt m, n, N; 3481 MatType mpi_mat_type; 3482 3483 PetscFunctionBegin; 3484 PetscCall(MatCreate(comm, mat)); 3485 PetscCall(MatGetSize(A, &m, &n)); 3486 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3487 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3488 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3489 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3490 3491 /* Get global columns of mat */ 3492 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3493 3494 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3495 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3496 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3497 PetscCall(MatSetType(*mat, mpi_mat_type)); 3498 3499 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3500 maij = (Mat_MPIAIJ *)(*mat)->data; 3501 3502 (*mat)->preallocated = PETSC_TRUE; 3503 3504 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3505 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3506 3507 /* Set A as diagonal portion of *mat */ 3508 maij->A = A; 3509 3510 nz = oi[m]; 3511 for (i = 0; i < nz; i++) { 3512 col = oj[i]; 3513 oj[i] = garray[col]; 3514 } 3515 3516 /* Set Bnew as off-diagonal portion of *mat */ 3517 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3518 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3519 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3520 bnew = (Mat_SeqAIJ *)Bnew->data; 3521 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3522 maij->B = Bnew; 3523 3524 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3525 3526 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3527 b->free_a = PETSC_FALSE; 3528 b->free_ij = PETSC_FALSE; 3529 PetscCall(MatDestroy(&B)); 3530 3531 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3532 bnew->free_a = PETSC_TRUE; 3533 bnew->free_ij = PETSC_TRUE; 3534 3535 /* condense columns of maij->B */ 3536 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3537 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3538 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3539 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3540 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3541 PetscFunctionReturn(PETSC_SUCCESS); 3542 } 3543 3544 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3545 3546 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3547 { 3548 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3549 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3550 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3551 Mat M, Msub, B = a->B; 3552 MatScalar *aa; 3553 Mat_SeqAIJ *aij; 3554 PetscInt *garray = a->garray, *colsub, Ncols; 3555 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3556 IS iscol_sub, iscmap; 3557 const PetscInt *is_idx, *cmap; 3558 PetscBool allcolumns = PETSC_FALSE; 3559 MPI_Comm comm; 3560 3561 PetscFunctionBegin; 3562 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3563 if (call == MAT_REUSE_MATRIX) { 3564 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3565 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3566 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3567 3568 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3569 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3570 3571 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3572 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3573 3574 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3575 3576 } else { /* call == MAT_INITIAL_MATRIX) */ 3577 PetscBool flg; 3578 3579 PetscCall(ISGetLocalSize(iscol, &n)); 3580 PetscCall(ISGetSize(iscol, &Ncols)); 3581 3582 /* (1) iscol -> nonscalable iscol_local */ 3583 /* Check for special case: each processor gets entire matrix columns */ 3584 PetscCall(ISIdentity(iscol_local, &flg)); 3585 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3586 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3587 if (allcolumns) { 3588 iscol_sub = iscol_local; 3589 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3590 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3591 3592 } else { 3593 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3594 PetscInt *idx, *cmap1, k; 3595 PetscCall(PetscMalloc1(Ncols, &idx)); 3596 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3597 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3598 count = 0; 3599 k = 0; 3600 for (i = 0; i < Ncols; i++) { 3601 j = is_idx[i]; 3602 if (j >= cstart && j < cend) { 3603 /* diagonal part of mat */ 3604 idx[count] = j; 3605 cmap1[count++] = i; /* column index in submat */ 3606 } else if (Bn) { 3607 /* off-diagonal part of mat */ 3608 if (j == garray[k]) { 3609 idx[count] = j; 3610 cmap1[count++] = i; /* column index in submat */ 3611 } else if (j > garray[k]) { 3612 while (j > garray[k] && k < Bn - 1) k++; 3613 if (j == garray[k]) { 3614 idx[count] = j; 3615 cmap1[count++] = i; /* column index in submat */ 3616 } 3617 } 3618 } 3619 } 3620 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3621 3622 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3623 PetscCall(ISGetBlockSize(iscol, &cbs)); 3624 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3625 3626 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3627 } 3628 3629 /* (3) Create sequential Msub */ 3630 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3631 } 3632 3633 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3634 aij = (Mat_SeqAIJ *)(Msub)->data; 3635 ii = aij->i; 3636 PetscCall(ISGetIndices(iscmap, &cmap)); 3637 3638 /* 3639 m - number of local rows 3640 Ncols - number of columns (same on all processors) 3641 rstart - first row in new global matrix generated 3642 */ 3643 PetscCall(MatGetSize(Msub, &m, NULL)); 3644 3645 if (call == MAT_INITIAL_MATRIX) { 3646 /* (4) Create parallel newmat */ 3647 PetscMPIInt rank, size; 3648 PetscInt csize; 3649 3650 PetscCallMPI(MPI_Comm_size(comm, &size)); 3651 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3652 3653 /* 3654 Determine the number of non-zeros in the diagonal and off-diagonal 3655 portions of the matrix in order to do correct preallocation 3656 */ 3657 3658 /* first get start and end of "diagonal" columns */ 3659 PetscCall(ISGetLocalSize(iscol, &csize)); 3660 if (csize == PETSC_DECIDE) { 3661 PetscCall(ISGetSize(isrow, &mglobal)); 3662 if (mglobal == Ncols) { /* square matrix */ 3663 nlocal = m; 3664 } else { 3665 nlocal = Ncols / size + ((Ncols % size) > rank); 3666 } 3667 } else { 3668 nlocal = csize; 3669 } 3670 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3671 rstart = rend - nlocal; 3672 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3673 3674 /* next, compute all the lengths */ 3675 jj = aij->j; 3676 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3677 olens = dlens + m; 3678 for (i = 0; i < m; i++) { 3679 jend = ii[i + 1] - ii[i]; 3680 olen = 0; 3681 dlen = 0; 3682 for (j = 0; j < jend; j++) { 3683 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3684 else dlen++; 3685 jj++; 3686 } 3687 olens[i] = olen; 3688 dlens[i] = dlen; 3689 } 3690 3691 PetscCall(ISGetBlockSize(isrow, &bs)); 3692 PetscCall(ISGetBlockSize(iscol, &cbs)); 3693 3694 PetscCall(MatCreate(comm, &M)); 3695 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3696 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3697 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3698 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3699 PetscCall(PetscFree(dlens)); 3700 3701 } else { /* call == MAT_REUSE_MATRIX */ 3702 M = *newmat; 3703 PetscCall(MatGetLocalSize(M, &i, NULL)); 3704 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3705 PetscCall(MatZeroEntries(M)); 3706 /* 3707 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3708 rather than the slower MatSetValues(). 3709 */ 3710 M->was_assembled = PETSC_TRUE; 3711 M->assembled = PETSC_FALSE; 3712 } 3713 3714 /* (5) Set values of Msub to *newmat */ 3715 PetscCall(PetscMalloc1(count, &colsub)); 3716 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3717 3718 jj = aij->j; 3719 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3720 for (i = 0; i < m; i++) { 3721 row = rstart + i; 3722 nz = ii[i + 1] - ii[i]; 3723 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3724 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3725 jj += nz; 3726 aa += nz; 3727 } 3728 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3729 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3730 3731 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3732 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3733 3734 PetscCall(PetscFree(colsub)); 3735 3736 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3737 if (call == MAT_INITIAL_MATRIX) { 3738 *newmat = M; 3739 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3740 PetscCall(MatDestroy(&Msub)); 3741 3742 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3743 PetscCall(ISDestroy(&iscol_sub)); 3744 3745 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3746 PetscCall(ISDestroy(&iscmap)); 3747 3748 if (iscol_local) { 3749 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3750 PetscCall(ISDestroy(&iscol_local)); 3751 } 3752 } 3753 PetscFunctionReturn(PETSC_SUCCESS); 3754 } 3755 3756 /* 3757 Not great since it makes two copies of the submatrix, first an SeqAIJ 3758 in local and then by concatenating the local matrices the end result. 3759 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3760 3761 This requires a sequential iscol with all indices. 3762 */ 3763 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3764 { 3765 PetscMPIInt rank, size; 3766 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3767 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3768 Mat M, Mreuse; 3769 MatScalar *aa, *vwork; 3770 MPI_Comm comm; 3771 Mat_SeqAIJ *aij; 3772 PetscBool colflag, allcolumns = PETSC_FALSE; 3773 3774 PetscFunctionBegin; 3775 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3776 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3777 PetscCallMPI(MPI_Comm_size(comm, &size)); 3778 3779 /* Check for special case: each processor gets entire matrix columns */ 3780 PetscCall(ISIdentity(iscol, &colflag)); 3781 PetscCall(ISGetLocalSize(iscol, &n)); 3782 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3783 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3784 3785 if (call == MAT_REUSE_MATRIX) { 3786 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3787 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3788 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3789 } else { 3790 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3791 } 3792 3793 /* 3794 m - number of local rows 3795 n - number of columns (same on all processors) 3796 rstart - first row in new global matrix generated 3797 */ 3798 PetscCall(MatGetSize(Mreuse, &m, &n)); 3799 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3800 if (call == MAT_INITIAL_MATRIX) { 3801 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3802 ii = aij->i; 3803 jj = aij->j; 3804 3805 /* 3806 Determine the number of non-zeros in the diagonal and off-diagonal 3807 portions of the matrix in order to do correct preallocation 3808 */ 3809 3810 /* first get start and end of "diagonal" columns */ 3811 if (csize == PETSC_DECIDE) { 3812 PetscCall(ISGetSize(isrow, &mglobal)); 3813 if (mglobal == n) { /* square matrix */ 3814 nlocal = m; 3815 } else { 3816 nlocal = n / size + ((n % size) > rank); 3817 } 3818 } else { 3819 nlocal = csize; 3820 } 3821 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3822 rstart = rend - nlocal; 3823 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3824 3825 /* next, compute all the lengths */ 3826 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3827 olens = dlens + m; 3828 for (i = 0; i < m; i++) { 3829 jend = ii[i + 1] - ii[i]; 3830 olen = 0; 3831 dlen = 0; 3832 for (j = 0; j < jend; j++) { 3833 if (*jj < rstart || *jj >= rend) olen++; 3834 else dlen++; 3835 jj++; 3836 } 3837 olens[i] = olen; 3838 dlens[i] = dlen; 3839 } 3840 PetscCall(MatCreate(comm, &M)); 3841 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3842 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3843 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3844 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3845 PetscCall(PetscFree(dlens)); 3846 } else { 3847 PetscInt ml, nl; 3848 3849 M = *newmat; 3850 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3851 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3852 PetscCall(MatZeroEntries(M)); 3853 /* 3854 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3855 rather than the slower MatSetValues(). 3856 */ 3857 M->was_assembled = PETSC_TRUE; 3858 M->assembled = PETSC_FALSE; 3859 } 3860 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3861 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3862 ii = aij->i; 3863 jj = aij->j; 3864 3865 /* trigger copy to CPU if needed */ 3866 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3867 for (i = 0; i < m; i++) { 3868 row = rstart + i; 3869 nz = ii[i + 1] - ii[i]; 3870 cwork = jj; 3871 jj += nz; 3872 vwork = aa; 3873 aa += nz; 3874 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3875 } 3876 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3877 3878 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3879 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3880 *newmat = M; 3881 3882 /* save submatrix used in processor for next request */ 3883 if (call == MAT_INITIAL_MATRIX) { 3884 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3885 PetscCall(MatDestroy(&Mreuse)); 3886 } 3887 PetscFunctionReturn(PETSC_SUCCESS); 3888 } 3889 3890 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3891 { 3892 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3893 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3894 const PetscInt *JJ; 3895 PetscBool nooffprocentries; 3896 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3897 3898 PetscFunctionBegin; 3899 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3900 3901 PetscCall(PetscLayoutSetUp(B->rmap)); 3902 PetscCall(PetscLayoutSetUp(B->cmap)); 3903 m = B->rmap->n; 3904 cstart = B->cmap->rstart; 3905 cend = B->cmap->rend; 3906 rstart = B->rmap->rstart; 3907 3908 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3909 3910 if (PetscDefined(USE_DEBUG)) { 3911 for (i = 0; i < m; i++) { 3912 nnz = Ii[i + 1] - Ii[i]; 3913 JJ = J + Ii[i]; 3914 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3915 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3916 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3917 } 3918 } 3919 3920 for (i = 0; i < m; i++) { 3921 nnz = Ii[i + 1] - Ii[i]; 3922 JJ = J + Ii[i]; 3923 nnz_max = PetscMax(nnz_max, nnz); 3924 d = 0; 3925 for (j = 0; j < nnz; j++) { 3926 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3927 } 3928 d_nnz[i] = d; 3929 o_nnz[i] = nnz - d; 3930 } 3931 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3932 PetscCall(PetscFree2(d_nnz, o_nnz)); 3933 3934 for (i = 0; i < m; i++) { 3935 ii = i + rstart; 3936 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3937 } 3938 nooffprocentries = B->nooffprocentries; 3939 B->nooffprocentries = PETSC_TRUE; 3940 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3941 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3942 B->nooffprocentries = nooffprocentries; 3943 3944 /* count number of entries below block diagonal */ 3945 PetscCall(PetscFree(Aij->ld)); 3946 PetscCall(PetscCalloc1(m, &ld)); 3947 Aij->ld = ld; 3948 for (i = 0; i < m; i++) { 3949 nnz = Ii[i + 1] - Ii[i]; 3950 j = 0; 3951 while (j < nnz && J[j] < cstart) j++; 3952 ld[i] = j; 3953 J += nnz; 3954 } 3955 3956 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3957 PetscFunctionReturn(PETSC_SUCCESS); 3958 } 3959 3960 /*@ 3961 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3962 (the default parallel PETSc format). 3963 3964 Collective 3965 3966 Input Parameters: 3967 + B - the matrix 3968 . i - the indices into j for the start of each local row (starts with zero) 3969 . j - the column indices for each local row (starts with zero) 3970 - v - optional values in the matrix 3971 3972 Level: developer 3973 3974 Notes: 3975 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3976 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3977 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3978 3979 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3980 3981 The format which is used for the sparse matrix input, is equivalent to a 3982 row-major ordering.. i.e for the following matrix, the input data expected is 3983 as shown 3984 3985 $ 1 0 0 3986 $ 2 0 3 P0 3987 $ ------- 3988 $ 4 5 6 P1 3989 $ 3990 $ Process0 [P0]: rows_owned=[0,1] 3991 $ i = {0,1,3} [size = nrow+1 = 2+1] 3992 $ j = {0,0,2} [size = 3] 3993 $ v = {1,2,3} [size = 3] 3994 $ 3995 $ Process1 [P1]: rows_owned=[2] 3996 $ i = {0,3} [size = nrow+1 = 1+1] 3997 $ j = {0,1,2} [size = 3] 3998 $ v = {4,5,6} [size = 3] 3999 4000 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 4001 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4002 @*/ 4003 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4004 { 4005 PetscFunctionBegin; 4006 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4007 PetscFunctionReturn(PETSC_SUCCESS); 4008 } 4009 4010 /*@C 4011 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4012 (the default parallel PETSc format). For good matrix assembly performance 4013 the user should preallocate the matrix storage by setting the parameters 4014 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4015 performance can be increased by more than a factor of 50. 4016 4017 Collective 4018 4019 Input Parameters: 4020 + B - the matrix 4021 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4022 (same value is used for all local rows) 4023 . d_nnz - array containing the number of nonzeros in the various rows of the 4024 DIAGONAL portion of the local submatrix (possibly different for each row) 4025 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 4026 The size of this array is equal to the number of local rows, i.e 'm'. 4027 For matrices that will be factored, you must leave room for (and set) 4028 the diagonal entry even if it is zero. 4029 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4030 submatrix (same value is used for all local rows). 4031 - o_nnz - array containing the number of nonzeros in the various rows of the 4032 OFF-DIAGONAL portion of the local submatrix (possibly different for 4033 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 4034 structure. The size of this array is equal to the number 4035 of local rows, i.e 'm'. 4036 4037 If the *_nnz parameter is given then the *_nz parameter is ignored 4038 4039 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 4040 storage. The stored row and column indices begin with zero. 4041 See [Sparse Matrices](sec_matsparse) for details. 4042 4043 The parallel matrix is partitioned such that the first m0 rows belong to 4044 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4045 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4046 4047 The DIAGONAL portion of the local submatrix of a processor can be defined 4048 as the submatrix which is obtained by extraction the part corresponding to 4049 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4050 first row that belongs to the processor, r2 is the last row belonging to 4051 the this processor, and c1-c2 is range of indices of the local part of a 4052 vector suitable for applying the matrix to. This is an mxn matrix. In the 4053 common case of a square matrix, the row and column ranges are the same and 4054 the DIAGONAL part is also square. The remaining portion of the local 4055 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4056 4057 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4058 4059 You can call MatGetInfo() to get information on how effective the preallocation was; 4060 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4061 You can also run with the option -info and look for messages with the string 4062 malloc in them to see if additional memory allocation was needed. 4063 4064 Example usage: 4065 4066 Consider the following 8x8 matrix with 34 non-zero values, that is 4067 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4068 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4069 as follows: 4070 4071 .vb 4072 1 2 0 | 0 3 0 | 0 4 4073 Proc0 0 5 6 | 7 0 0 | 8 0 4074 9 0 10 | 11 0 0 | 12 0 4075 ------------------------------------- 4076 13 0 14 | 15 16 17 | 0 0 4077 Proc1 0 18 0 | 19 20 21 | 0 0 4078 0 0 0 | 22 23 0 | 24 0 4079 ------------------------------------- 4080 Proc2 25 26 27 | 0 0 28 | 29 0 4081 30 0 0 | 31 32 33 | 0 34 4082 .ve 4083 4084 This can be represented as a collection of submatrices as: 4085 4086 .vb 4087 A B C 4088 D E F 4089 G H I 4090 .ve 4091 4092 Where the submatrices A,B,C are owned by proc0, D,E,F are 4093 owned by proc1, G,H,I are owned by proc2. 4094 4095 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'M','N' parameters are 8,8, and have the same values on all procs. 4098 4099 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4100 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4101 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4102 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4103 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 4104 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4105 4106 When d_nz, o_nz parameters are specified, d_nz storage elements are 4107 allocated for every row of the local diagonal submatrix, and o_nz 4108 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4109 One way to choose d_nz and o_nz is to use the max nonzerors per local 4110 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4111 In this case, the values of d_nz,o_nz are: 4112 .vb 4113 proc0 : dnz = 2, o_nz = 2 4114 proc1 : dnz = 3, o_nz = 2 4115 proc2 : dnz = 1, o_nz = 4 4116 .ve 4117 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4118 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4119 for proc3. i.e we are using 12+15+10=37 storage locations to store 4120 34 values. 4121 4122 When d_nnz, o_nnz parameters are specified, the storage is specified 4123 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4124 In the above case the values for d_nnz,o_nnz are: 4125 .vb 4126 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4127 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4128 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4129 .ve 4130 Here the space allocated is sum of all the above values i.e 34, and 4131 hence pre-allocation is perfect. 4132 4133 Level: intermediate 4134 4135 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4136 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4137 @*/ 4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4139 { 4140 PetscFunctionBegin; 4141 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4142 PetscValidType(B, 1); 4143 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4144 PetscFunctionReturn(PETSC_SUCCESS); 4145 } 4146 4147 /*@ 4148 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4149 CSR format for the local rows. 4150 4151 Collective 4152 4153 Input Parameters: 4154 + comm - MPI communicator 4155 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4156 . n - This value should be the same as the local size used in creating the 4157 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4158 calculated if N is given) For square matrices n is almost always m. 4159 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4160 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4161 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4162 . j - column indices 4163 - a - optional matrix values 4164 4165 Output Parameter: 4166 . mat - the matrix 4167 4168 Level: intermediate 4169 4170 Notes: 4171 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4172 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4173 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4174 4175 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4176 4177 The format which is used for the sparse matrix input, is equivalent to a 4178 row-major ordering.. i.e for the following matrix, the input data expected is 4179 as shown 4180 4181 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4182 4183 $ 1 0 0 4184 $ 2 0 3 P0 4185 $ ------- 4186 $ 4 5 6 P1 4187 $ 4188 $ Process0 [P0]: rows_owned=[0,1] 4189 $ i = {0,1,3} [size = nrow+1 = 2+1] 4190 $ j = {0,0,2} [size = 3] 4191 $ v = {1,2,3} [size = 3] 4192 $ 4193 $ Process1 [P1]: rows_owned=[2] 4194 $ i = {0,3} [size = nrow+1 = 1+1] 4195 $ j = {0,1,2} [size = 3] 4196 $ v = {4,5,6} [size = 3] 4197 4198 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4199 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4200 @*/ 4201 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4202 { 4203 PetscFunctionBegin; 4204 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4205 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4206 PetscCall(MatCreate(comm, mat)); 4207 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4208 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4209 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4210 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4211 PetscFunctionReturn(PETSC_SUCCESS); 4212 } 4213 4214 /*@ 4215 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4216 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4217 4218 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4219 4220 Collective 4221 4222 Input Parameters: 4223 + mat - the matrix 4224 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4225 . n - This value should be the same as the local size used in creating the 4226 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4227 calculated if N is given) For square matrices n is almost always m. 4228 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4229 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4230 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4231 . J - column indices 4232 - v - matrix values 4233 4234 Level: intermediate 4235 4236 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4237 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4238 @*/ 4239 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4240 { 4241 PetscInt nnz, i; 4242 PetscBool nooffprocentries; 4243 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4244 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4245 PetscScalar *ad, *ao; 4246 PetscInt ldi, Iii, md; 4247 const PetscInt *Adi = Ad->i; 4248 PetscInt *ld = Aij->ld; 4249 4250 PetscFunctionBegin; 4251 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4252 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4253 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4254 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4255 4256 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4257 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4258 4259 for (i = 0; i < m; i++) { 4260 nnz = Ii[i + 1] - Ii[i]; 4261 Iii = Ii[i]; 4262 ldi = ld[i]; 4263 md = Adi[i + 1] - Adi[i]; 4264 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4265 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4266 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4267 ad += md; 4268 ao += nnz - md; 4269 } 4270 nooffprocentries = mat->nooffprocentries; 4271 mat->nooffprocentries = PETSC_TRUE; 4272 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4273 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4274 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4275 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4276 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4277 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4278 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4279 mat->nooffprocentries = nooffprocentries; 4280 PetscFunctionReturn(PETSC_SUCCESS); 4281 } 4282 4283 /*@ 4284 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4285 4286 Collective 4287 4288 Input Parameters: 4289 + mat - the matrix 4290 - v - matrix values, stored by row 4291 4292 Level: intermediate 4293 4294 Note: 4295 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4296 4297 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4298 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4299 @*/ 4300 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4301 { 4302 PetscInt nnz, i, m; 4303 PetscBool nooffprocentries; 4304 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4305 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4306 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4307 PetscScalar *ad, *ao; 4308 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4309 PetscInt ldi, Iii, md; 4310 PetscInt *ld = Aij->ld; 4311 4312 PetscFunctionBegin; 4313 m = mat->rmap->n; 4314 4315 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4316 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4317 Iii = 0; 4318 for (i = 0; i < m; i++) { 4319 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4320 ldi = ld[i]; 4321 md = Adi[i + 1] - Adi[i]; 4322 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4323 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4324 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4325 ad += md; 4326 ao += nnz - md; 4327 Iii += nnz; 4328 } 4329 nooffprocentries = mat->nooffprocentries; 4330 mat->nooffprocentries = PETSC_TRUE; 4331 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4332 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4333 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4334 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4335 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4336 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4337 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4338 mat->nooffprocentries = nooffprocentries; 4339 PetscFunctionReturn(PETSC_SUCCESS); 4340 } 4341 4342 /*@C 4343 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4344 (the default parallel PETSc format). For good matrix assembly performance 4345 the user should preallocate the matrix storage by setting the parameters 4346 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4347 performance can be increased by more than a factor of 50. 4348 4349 Collective 4350 4351 Input Parameters: 4352 + comm - MPI communicator 4353 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4354 This value should be the same as the local size used in creating the 4355 y vector for the matrix-vector product y = Ax. 4356 . n - This value should be the same as the local size used in creating the 4357 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4358 calculated if N is given) For square matrices n is almost always m. 4359 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4360 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4361 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4362 (same value is used for all local rows) 4363 . d_nnz - array containing the number of nonzeros in the various rows of the 4364 DIAGONAL portion of the local submatrix (possibly different for each row) 4365 or NULL, if d_nz is used to specify the nonzero structure. 4366 The size of this array is equal to the number of local rows, i.e 'm'. 4367 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4368 submatrix (same value is used for all local rows). 4369 - o_nnz - array containing the number of nonzeros in the various rows of the 4370 OFF-DIAGONAL portion of the local submatrix (possibly different for 4371 each row) or NULL, if o_nz is used to specify the nonzero 4372 structure. The size of this array is equal to the number 4373 of local rows, i.e 'm'. 4374 4375 Output Parameter: 4376 . A - the matrix 4377 4378 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4379 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4380 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4381 4382 Notes: 4383 If the *_nnz parameter is given then the *_nz parameter is ignored 4384 4385 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4386 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4387 storage requirements for this matrix. 4388 4389 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4390 processor than it must be used on all processors that share the object for 4391 that argument. 4392 4393 The user MUST specify either the local or global matrix dimensions 4394 (possibly both). 4395 4396 The parallel matrix is partitioned across processors such that the 4397 first m0 rows belong to process 0, the next m1 rows belong to 4398 process 1, the next m2 rows belong to process 2 etc.. where 4399 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4400 values corresponding to [m x N] submatrix. 4401 4402 The columns are logically partitioned with the n0 columns belonging 4403 to 0th partition, the next n1 columns belonging to the next 4404 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4405 4406 The DIAGONAL portion of the local submatrix on any given processor 4407 is the submatrix corresponding to the rows and columns m,n 4408 corresponding to the given processor. i.e diagonal matrix on 4409 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4410 etc. The remaining portion of the local submatrix [m x (N-n)] 4411 constitute the OFF-DIAGONAL portion. The example below better 4412 illustrates this concept. 4413 4414 For a square global matrix we define each processor's diagonal portion 4415 to be its local rows and the corresponding columns (a square submatrix); 4416 each processor's off-diagonal portion encompasses the remainder of the 4417 local matrix (a rectangular submatrix). 4418 4419 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4420 4421 When calling this routine with a single process communicator, a matrix of 4422 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4423 type of communicator, use the construction mechanism 4424 .vb 4425 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4426 .ve 4427 4428 $ MatCreate(...,&A); 4429 $ MatSetType(A,MATMPIAIJ); 4430 $ MatSetSizes(A, m,n,M,N); 4431 $ MatMPIAIJSetPreallocation(A,...); 4432 4433 By default, this format uses inodes (identical nodes) when possible. 4434 We search for consecutive rows with the same nonzero structure, thereby 4435 reusing matrix information to achieve increased efficiency. 4436 4437 Options Database Keys: 4438 + -mat_no_inode - Do not use inodes 4439 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4440 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4441 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4442 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4443 4444 Example usage: 4445 4446 Consider the following 8x8 matrix with 34 non-zero values, that is 4447 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4448 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4449 as follows 4450 4451 .vb 4452 1 2 0 | 0 3 0 | 0 4 4453 Proc0 0 5 6 | 7 0 0 | 8 0 4454 9 0 10 | 11 0 0 | 12 0 4455 ------------------------------------- 4456 13 0 14 | 15 16 17 | 0 0 4457 Proc1 0 18 0 | 19 20 21 | 0 0 4458 0 0 0 | 22 23 0 | 24 0 4459 ------------------------------------- 4460 Proc2 25 26 27 | 0 0 28 | 29 0 4461 30 0 0 | 31 32 33 | 0 34 4462 .ve 4463 4464 This can be represented as a collection of submatrices as 4465 4466 .vb 4467 A B C 4468 D E F 4469 G H I 4470 .ve 4471 4472 Where the submatrices A,B,C are owned by proc0, D,E,F are 4473 owned by proc1, G,H,I are owned by proc2. 4474 4475 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4476 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4477 The 'M','N' parameters are 8,8, and have the same values on all procs. 4478 4479 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4480 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4481 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4482 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4483 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4484 matrix, ans [DF] as another SeqAIJ matrix. 4485 4486 When d_nz, o_nz parameters are specified, d_nz storage elements are 4487 allocated for every row of the local diagonal submatrix, and o_nz 4488 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4489 One way to choose d_nz and o_nz is to use the max nonzerors per local 4490 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4491 In this case, the values of d_nz,o_nz are 4492 .vb 4493 proc0 : dnz = 2, o_nz = 2 4494 proc1 : dnz = 3, o_nz = 2 4495 proc2 : dnz = 1, o_nz = 4 4496 .ve 4497 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4498 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4499 for proc3. i.e we are using 12+15+10=37 storage locations to store 4500 34 values. 4501 4502 When d_nnz, o_nnz parameters are specified, the storage is specified 4503 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4504 In the above case the values for d_nnz,o_nnz are 4505 .vb 4506 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4507 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4508 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4509 .ve 4510 Here the space allocated is sum of all the above values i.e 34, and 4511 hence pre-allocation is perfect. 4512 4513 Level: intermediate 4514 4515 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4516 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4517 @*/ 4518 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4519 { 4520 PetscMPIInt size; 4521 4522 PetscFunctionBegin; 4523 PetscCall(MatCreate(comm, A)); 4524 PetscCall(MatSetSizes(*A, m, n, M, N)); 4525 PetscCallMPI(MPI_Comm_size(comm, &size)); 4526 if (size > 1) { 4527 PetscCall(MatSetType(*A, MATMPIAIJ)); 4528 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4529 } else { 4530 PetscCall(MatSetType(*A, MATSEQAIJ)); 4531 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4532 } 4533 PetscFunctionReturn(PETSC_SUCCESS); 4534 } 4535 4536 /*MC 4537 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4538 4539 Synopsis: 4540 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4541 4542 Not Collective 4543 4544 Input Parameter: 4545 . A - the `MATMPIAIJ` matrix 4546 4547 Output Parameters: 4548 + Ad - the diagonal portion of the matrix 4549 . Ao - the off diagonal portion of the matrix 4550 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4551 - ierr - error code 4552 4553 Level: advanced 4554 4555 Note: 4556 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4557 4558 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4559 M*/ 4560 4561 /*MC 4562 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4563 4564 Synopsis: 4565 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4566 4567 Not Collective 4568 4569 Input Parameters: 4570 + A - the `MATMPIAIJ` matrix 4571 . Ad - the diagonal portion of the matrix 4572 . Ao - the off diagonal portion of the matrix 4573 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4574 - ierr - error code 4575 4576 Level: advanced 4577 4578 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4579 M*/ 4580 4581 /*@C 4582 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4583 4584 Not collective 4585 4586 Input Parameter: 4587 . A - The `MATMPIAIJ` matrix 4588 4589 Output Parameters: 4590 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4591 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4592 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4593 4594 Level: intermediate 4595 4596 Note: 4597 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4598 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4599 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4600 local column numbers to global column numbers in the original matrix. 4601 4602 Fortran Note: 4603 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4604 4605 .seealso: `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4606 @*/ 4607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4608 { 4609 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4610 PetscBool flg; 4611 4612 PetscFunctionBegin; 4613 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4614 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4615 if (Ad) *Ad = a->A; 4616 if (Ao) *Ao = a->B; 4617 if (colmap) *colmap = a->garray; 4618 PetscFunctionReturn(PETSC_SUCCESS); 4619 } 4620 4621 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4622 { 4623 PetscInt m, N, i, rstart, nnz, Ii; 4624 PetscInt *indx; 4625 PetscScalar *values; 4626 MatType rootType; 4627 4628 PetscFunctionBegin; 4629 PetscCall(MatGetSize(inmat, &m, &N)); 4630 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4631 PetscInt *dnz, *onz, sum, bs, cbs; 4632 4633 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4634 /* Check sum(n) = N */ 4635 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4636 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4637 4638 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4639 rstart -= m; 4640 4641 MatPreallocateBegin(comm, m, n, dnz, onz); 4642 for (i = 0; i < m; i++) { 4643 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4644 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4645 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4646 } 4647 4648 PetscCall(MatCreate(comm, outmat)); 4649 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4650 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4651 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4652 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4653 PetscCall(MatSetType(*outmat, rootType)); 4654 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4655 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4656 MatPreallocateEnd(dnz, onz); 4657 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4658 } 4659 4660 /* numeric phase */ 4661 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4662 for (i = 0; i < m; i++) { 4663 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4664 Ii = i + rstart; 4665 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4666 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4667 } 4668 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4669 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4670 PetscFunctionReturn(PETSC_SUCCESS); 4671 } 4672 4673 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4674 { 4675 PetscMPIInt rank; 4676 PetscInt m, N, i, rstart, nnz; 4677 size_t len; 4678 const PetscInt *indx; 4679 PetscViewer out; 4680 char *name; 4681 Mat B; 4682 const PetscScalar *values; 4683 4684 PetscFunctionBegin; 4685 PetscCall(MatGetLocalSize(A, &m, NULL)); 4686 PetscCall(MatGetSize(A, NULL, &N)); 4687 /* Should this be the type of the diagonal block of A? */ 4688 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4689 PetscCall(MatSetSizes(B, m, N, m, N)); 4690 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4691 PetscCall(MatSetType(B, MATSEQAIJ)); 4692 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4693 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4694 for (i = 0; i < m; i++) { 4695 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4696 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4697 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4698 } 4699 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4700 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4701 4702 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4703 PetscCall(PetscStrlen(outfile, &len)); 4704 PetscCall(PetscMalloc1(len + 6, &name)); 4705 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4706 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4707 PetscCall(PetscFree(name)); 4708 PetscCall(MatView(B, out)); 4709 PetscCall(PetscViewerDestroy(&out)); 4710 PetscCall(MatDestroy(&B)); 4711 PetscFunctionReturn(PETSC_SUCCESS); 4712 } 4713 4714 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4715 { 4716 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4717 4718 PetscFunctionBegin; 4719 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4720 PetscCall(PetscFree(merge->id_r)); 4721 PetscCall(PetscFree(merge->len_s)); 4722 PetscCall(PetscFree(merge->len_r)); 4723 PetscCall(PetscFree(merge->bi)); 4724 PetscCall(PetscFree(merge->bj)); 4725 PetscCall(PetscFree(merge->buf_ri[0])); 4726 PetscCall(PetscFree(merge->buf_ri)); 4727 PetscCall(PetscFree(merge->buf_rj[0])); 4728 PetscCall(PetscFree(merge->buf_rj)); 4729 PetscCall(PetscFree(merge->coi)); 4730 PetscCall(PetscFree(merge->coj)); 4731 PetscCall(PetscFree(merge->owners_co)); 4732 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4733 PetscCall(PetscFree(merge)); 4734 PetscFunctionReturn(PETSC_SUCCESS); 4735 } 4736 4737 #include <../src/mat/utils/freespace.h> 4738 #include <petscbt.h> 4739 4740 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4741 { 4742 MPI_Comm comm; 4743 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4744 PetscMPIInt size, rank, taga, *len_s; 4745 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4746 PetscInt proc, m; 4747 PetscInt **buf_ri, **buf_rj; 4748 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4749 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4750 MPI_Request *s_waits, *r_waits; 4751 MPI_Status *status; 4752 const MatScalar *aa, *a_a; 4753 MatScalar **abuf_r, *ba_i; 4754 Mat_Merge_SeqsToMPI *merge; 4755 PetscContainer container; 4756 4757 PetscFunctionBegin; 4758 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4759 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4760 4761 PetscCallMPI(MPI_Comm_size(comm, &size)); 4762 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4763 4764 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4765 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4766 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4767 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4768 aa = a_a; 4769 4770 bi = merge->bi; 4771 bj = merge->bj; 4772 buf_ri = merge->buf_ri; 4773 buf_rj = merge->buf_rj; 4774 4775 PetscCall(PetscMalloc1(size, &status)); 4776 owners = merge->rowmap->range; 4777 len_s = merge->len_s; 4778 4779 /* send and recv matrix values */ 4780 /*-----------------------------*/ 4781 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4782 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4783 4784 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4785 for (proc = 0, k = 0; proc < size; proc++) { 4786 if (!len_s[proc]) continue; 4787 i = owners[proc]; 4788 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4789 k++; 4790 } 4791 4792 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4793 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4794 PetscCall(PetscFree(status)); 4795 4796 PetscCall(PetscFree(s_waits)); 4797 PetscCall(PetscFree(r_waits)); 4798 4799 /* insert mat values of mpimat */ 4800 /*----------------------------*/ 4801 PetscCall(PetscMalloc1(N, &ba_i)); 4802 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4803 4804 for (k = 0; k < merge->nrecv; k++) { 4805 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4806 nrows = *(buf_ri_k[k]); 4807 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4808 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4809 } 4810 4811 /* set values of ba */ 4812 m = merge->rowmap->n; 4813 for (i = 0; i < m; i++) { 4814 arow = owners[rank] + i; 4815 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4816 bnzi = bi[i + 1] - bi[i]; 4817 PetscCall(PetscArrayzero(ba_i, bnzi)); 4818 4819 /* add local non-zero vals of this proc's seqmat into ba */ 4820 anzi = ai[arow + 1] - ai[arow]; 4821 aj = a->j + ai[arow]; 4822 aa = a_a + ai[arow]; 4823 nextaj = 0; 4824 for (j = 0; nextaj < anzi; j++) { 4825 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4826 ba_i[j] += aa[nextaj++]; 4827 } 4828 } 4829 4830 /* add received vals into ba */ 4831 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4832 /* i-th row */ 4833 if (i == *nextrow[k]) { 4834 anzi = *(nextai[k] + 1) - *nextai[k]; 4835 aj = buf_rj[k] + *(nextai[k]); 4836 aa = abuf_r[k] + *(nextai[k]); 4837 nextaj = 0; 4838 for (j = 0; nextaj < anzi; j++) { 4839 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4840 ba_i[j] += aa[nextaj++]; 4841 } 4842 } 4843 nextrow[k]++; 4844 nextai[k]++; 4845 } 4846 } 4847 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4848 } 4849 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4850 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4851 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4852 4853 PetscCall(PetscFree(abuf_r[0])); 4854 PetscCall(PetscFree(abuf_r)); 4855 PetscCall(PetscFree(ba_i)); 4856 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4857 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4858 PetscFunctionReturn(PETSC_SUCCESS); 4859 } 4860 4861 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4862 { 4863 Mat B_mpi; 4864 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4865 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4866 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4867 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4868 PetscInt len, proc, *dnz, *onz, bs, cbs; 4869 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4870 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4871 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4872 MPI_Status *status; 4873 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4874 PetscBT lnkbt; 4875 Mat_Merge_SeqsToMPI *merge; 4876 PetscContainer container; 4877 4878 PetscFunctionBegin; 4879 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4880 4881 /* make sure it is a PETSc comm */ 4882 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4883 PetscCallMPI(MPI_Comm_size(comm, &size)); 4884 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4885 4886 PetscCall(PetscNew(&merge)); 4887 PetscCall(PetscMalloc1(size, &status)); 4888 4889 /* determine row ownership */ 4890 /*---------------------------------------------------------*/ 4891 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4892 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4893 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4894 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4895 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4896 PetscCall(PetscMalloc1(size, &len_si)); 4897 PetscCall(PetscMalloc1(size, &merge->len_s)); 4898 4899 m = merge->rowmap->n; 4900 owners = merge->rowmap->range; 4901 4902 /* determine the number of messages to send, their lengths */ 4903 /*---------------------------------------------------------*/ 4904 len_s = merge->len_s; 4905 4906 len = 0; /* length of buf_si[] */ 4907 merge->nsend = 0; 4908 for (proc = 0; proc < size; proc++) { 4909 len_si[proc] = 0; 4910 if (proc == rank) { 4911 len_s[proc] = 0; 4912 } else { 4913 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4914 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4915 } 4916 if (len_s[proc]) { 4917 merge->nsend++; 4918 nrows = 0; 4919 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4920 if (ai[i + 1] > ai[i]) nrows++; 4921 } 4922 len_si[proc] = 2 * (nrows + 1); 4923 len += len_si[proc]; 4924 } 4925 } 4926 4927 /* determine the number and length of messages to receive for ij-structure */ 4928 /*-------------------------------------------------------------------------*/ 4929 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4930 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4931 4932 /* post the Irecv of j-structure */ 4933 /*-------------------------------*/ 4934 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4935 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4936 4937 /* post the Isend of j-structure */ 4938 /*--------------------------------*/ 4939 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4940 4941 for (proc = 0, k = 0; proc < size; proc++) { 4942 if (!len_s[proc]) continue; 4943 i = owners[proc]; 4944 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4945 k++; 4946 } 4947 4948 /* receives and sends of j-structure are complete */ 4949 /*------------------------------------------------*/ 4950 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4951 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4952 4953 /* send and recv i-structure */ 4954 /*---------------------------*/ 4955 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4956 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4957 4958 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4959 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4960 for (proc = 0, k = 0; proc < size; proc++) { 4961 if (!len_s[proc]) continue; 4962 /* form outgoing message for i-structure: 4963 buf_si[0]: nrows to be sent 4964 [1:nrows]: row index (global) 4965 [nrows+1:2*nrows+1]: i-structure index 4966 */ 4967 /*-------------------------------------------*/ 4968 nrows = len_si[proc] / 2 - 1; 4969 buf_si_i = buf_si + nrows + 1; 4970 buf_si[0] = nrows; 4971 buf_si_i[0] = 0; 4972 nrows = 0; 4973 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4974 anzi = ai[i + 1] - ai[i]; 4975 if (anzi) { 4976 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4977 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4978 nrows++; 4979 } 4980 } 4981 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4982 k++; 4983 buf_si += len_si[proc]; 4984 } 4985 4986 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4987 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4988 4989 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4990 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4991 4992 PetscCall(PetscFree(len_si)); 4993 PetscCall(PetscFree(len_ri)); 4994 PetscCall(PetscFree(rj_waits)); 4995 PetscCall(PetscFree2(si_waits, sj_waits)); 4996 PetscCall(PetscFree(ri_waits)); 4997 PetscCall(PetscFree(buf_s)); 4998 PetscCall(PetscFree(status)); 4999 5000 /* compute a local seq matrix in each processor */ 5001 /*----------------------------------------------*/ 5002 /* allocate bi array and free space for accumulating nonzero column info */ 5003 PetscCall(PetscMalloc1(m + 1, &bi)); 5004 bi[0] = 0; 5005 5006 /* create and initialize a linked list */ 5007 nlnk = N + 1; 5008 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 5009 5010 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5011 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5012 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5013 5014 current_space = free_space; 5015 5016 /* determine symbolic info for each local row */ 5017 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5018 5019 for (k = 0; k < merge->nrecv; k++) { 5020 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5021 nrows = *buf_ri_k[k]; 5022 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5023 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5024 } 5025 5026 MatPreallocateBegin(comm, m, n, dnz, onz); 5027 len = 0; 5028 for (i = 0; i < m; i++) { 5029 bnzi = 0; 5030 /* add local non-zero cols of this proc's seqmat into lnk */ 5031 arow = owners[rank] + i; 5032 anzi = ai[arow + 1] - ai[arow]; 5033 aj = a->j + ai[arow]; 5034 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5035 bnzi += nlnk; 5036 /* add received col data into lnk */ 5037 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5038 if (i == *nextrow[k]) { /* i-th row */ 5039 anzi = *(nextai[k] + 1) - *nextai[k]; 5040 aj = buf_rj[k] + *nextai[k]; 5041 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5042 bnzi += nlnk; 5043 nextrow[k]++; 5044 nextai[k]++; 5045 } 5046 } 5047 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5048 5049 /* if free space is not available, make more free space */ 5050 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5051 /* copy data into free space, then initialize lnk */ 5052 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5053 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5054 5055 current_space->array += bnzi; 5056 current_space->local_used += bnzi; 5057 current_space->local_remaining -= bnzi; 5058 5059 bi[i + 1] = bi[i] + bnzi; 5060 } 5061 5062 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5063 5064 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5065 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5066 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5067 5068 /* create symbolic parallel matrix B_mpi */ 5069 /*---------------------------------------*/ 5070 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5071 PetscCall(MatCreate(comm, &B_mpi)); 5072 if (n == PETSC_DECIDE) { 5073 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5074 } else { 5075 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5076 } 5077 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5078 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5079 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5080 MatPreallocateEnd(dnz, onz); 5081 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5082 5083 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5084 B_mpi->assembled = PETSC_FALSE; 5085 merge->bi = bi; 5086 merge->bj = bj; 5087 merge->buf_ri = buf_ri; 5088 merge->buf_rj = buf_rj; 5089 merge->coi = NULL; 5090 merge->coj = NULL; 5091 merge->owners_co = NULL; 5092 5093 PetscCall(PetscCommDestroy(&comm)); 5094 5095 /* attach the supporting struct to B_mpi for reuse */ 5096 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5097 PetscCall(PetscContainerSetPointer(container, merge)); 5098 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5099 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5100 PetscCall(PetscContainerDestroy(&container)); 5101 *mpimat = B_mpi; 5102 5103 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5104 PetscFunctionReturn(PETSC_SUCCESS); 5105 } 5106 5107 /*@C 5108 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5109 matrices from each processor 5110 5111 Collective 5112 5113 Input Parameters: 5114 + comm - the communicators the parallel matrix will live on 5115 . seqmat - the input sequential matrices 5116 . m - number of local rows (or `PETSC_DECIDE`) 5117 . n - number of local columns (or `PETSC_DECIDE`) 5118 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5119 5120 Output Parameter: 5121 . mpimat - the parallel matrix generated 5122 5123 Level: advanced 5124 5125 Note: 5126 The dimensions of the sequential matrix in each processor MUST be the same. 5127 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5128 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5129 @*/ 5130 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5131 { 5132 PetscMPIInt size; 5133 5134 PetscFunctionBegin; 5135 PetscCallMPI(MPI_Comm_size(comm, &size)); 5136 if (size == 1) { 5137 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5138 if (scall == MAT_INITIAL_MATRIX) { 5139 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5140 } else { 5141 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5142 } 5143 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5144 PetscFunctionReturn(PETSC_SUCCESS); 5145 } 5146 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5147 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5148 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5149 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5150 PetscFunctionReturn(PETSC_SUCCESS); 5151 } 5152 5153 /*@ 5154 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5155 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5156 with `MatGetSize()` 5157 5158 Not Collective 5159 5160 Input Parameters: 5161 + A - the matrix 5162 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5163 5164 Output Parameter: 5165 . A_loc - the local sequential matrix generated 5166 5167 Level: developer 5168 5169 Notes: 5170 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5171 5172 Destroy the matrix with `MatDestroy()` 5173 5174 .seealso: `MatMPIAIJGetLocalMat()` 5175 @*/ 5176 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5177 { 5178 PetscBool mpi; 5179 5180 PetscFunctionBegin; 5181 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5182 if (mpi) { 5183 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5184 } else { 5185 *A_loc = A; 5186 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5187 } 5188 PetscFunctionReturn(PETSC_SUCCESS); 5189 } 5190 5191 /*@ 5192 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5193 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5194 with `MatGetSize()` 5195 5196 Not Collective 5197 5198 Input Parameters: 5199 + A - the matrix 5200 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5201 5202 Output Parameter: 5203 . A_loc - the local sequential matrix generated 5204 5205 Level: developer 5206 5207 Notes: 5208 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5209 5210 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5211 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5212 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5213 modify the values of the returned A_loc. 5214 5215 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5216 @*/ 5217 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5218 { 5219 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5220 Mat_SeqAIJ *mat, *a, *b; 5221 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5222 const PetscScalar *aa, *ba, *aav, *bav; 5223 PetscScalar *ca, *cam; 5224 PetscMPIInt size; 5225 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5226 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5227 PetscBool match; 5228 5229 PetscFunctionBegin; 5230 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5231 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5232 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5233 if (size == 1) { 5234 if (scall == MAT_INITIAL_MATRIX) { 5235 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5236 *A_loc = mpimat->A; 5237 } else if (scall == MAT_REUSE_MATRIX) { 5238 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5239 } 5240 PetscFunctionReturn(PETSC_SUCCESS); 5241 } 5242 5243 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5244 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5245 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5246 ai = a->i; 5247 aj = a->j; 5248 bi = b->i; 5249 bj = b->j; 5250 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5251 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5252 aa = aav; 5253 ba = bav; 5254 if (scall == MAT_INITIAL_MATRIX) { 5255 PetscCall(PetscMalloc1(1 + am, &ci)); 5256 ci[0] = 0; 5257 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5258 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5259 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5260 k = 0; 5261 for (i = 0; i < am; i++) { 5262 ncols_o = bi[i + 1] - bi[i]; 5263 ncols_d = ai[i + 1] - ai[i]; 5264 /* off-diagonal portion of A */ 5265 for (jo = 0; jo < ncols_o; jo++) { 5266 col = cmap[*bj]; 5267 if (col >= cstart) break; 5268 cj[k] = col; 5269 bj++; 5270 ca[k++] = *ba++; 5271 } 5272 /* diagonal portion of A */ 5273 for (j = 0; j < ncols_d; j++) { 5274 cj[k] = cstart + *aj++; 5275 ca[k++] = *aa++; 5276 } 5277 /* off-diagonal portion of A */ 5278 for (j = jo; j < ncols_o; j++) { 5279 cj[k] = cmap[*bj++]; 5280 ca[k++] = *ba++; 5281 } 5282 } 5283 /* put together the new matrix */ 5284 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5285 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5286 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5287 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5288 mat->free_a = PETSC_TRUE; 5289 mat->free_ij = PETSC_TRUE; 5290 mat->nonew = 0; 5291 } else if (scall == MAT_REUSE_MATRIX) { 5292 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5293 ci = mat->i; 5294 cj = mat->j; 5295 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5296 for (i = 0; i < am; i++) { 5297 /* off-diagonal portion of A */ 5298 ncols_o = bi[i + 1] - bi[i]; 5299 for (jo = 0; jo < ncols_o; jo++) { 5300 col = cmap[*bj]; 5301 if (col >= cstart) break; 5302 *cam++ = *ba++; 5303 bj++; 5304 } 5305 /* diagonal portion of A */ 5306 ncols_d = ai[i + 1] - ai[i]; 5307 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5308 /* off-diagonal portion of A */ 5309 for (j = jo; j < ncols_o; j++) { 5310 *cam++ = *ba++; 5311 bj++; 5312 } 5313 } 5314 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5315 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5316 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5317 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5318 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5319 PetscFunctionReturn(PETSC_SUCCESS); 5320 } 5321 5322 /*@ 5323 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5324 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5325 5326 Not Collective 5327 5328 Input Parameters: 5329 + A - the matrix 5330 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5331 5332 Output Parameters: 5333 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5334 - A_loc - the local sequential matrix generated 5335 5336 Level: developer 5337 5338 Note: 5339 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5340 5341 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5342 @*/ 5343 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5344 { 5345 Mat Ao, Ad; 5346 const PetscInt *cmap; 5347 PetscMPIInt size; 5348 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5349 5350 PetscFunctionBegin; 5351 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5352 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5353 if (size == 1) { 5354 if (scall == MAT_INITIAL_MATRIX) { 5355 PetscCall(PetscObjectReference((PetscObject)Ad)); 5356 *A_loc = Ad; 5357 } else if (scall == MAT_REUSE_MATRIX) { 5358 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5359 } 5360 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5361 PetscFunctionReturn(PETSC_SUCCESS); 5362 } 5363 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5364 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5365 if (f) { 5366 PetscCall((*f)(A, scall, glob, A_loc)); 5367 } else { 5368 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5369 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5370 Mat_SeqAIJ *c; 5371 PetscInt *ai = a->i, *aj = a->j; 5372 PetscInt *bi = b->i, *bj = b->j; 5373 PetscInt *ci, *cj; 5374 const PetscScalar *aa, *ba; 5375 PetscScalar *ca; 5376 PetscInt i, j, am, dn, on; 5377 5378 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5379 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5380 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5381 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5382 if (scall == MAT_INITIAL_MATRIX) { 5383 PetscInt k; 5384 PetscCall(PetscMalloc1(1 + am, &ci)); 5385 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5386 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5387 ci[0] = 0; 5388 for (i = 0, k = 0; i < am; i++) { 5389 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5390 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5391 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5392 /* diagonal portion of A */ 5393 for (j = 0; j < ncols_d; j++, k++) { 5394 cj[k] = *aj++; 5395 ca[k] = *aa++; 5396 } 5397 /* off-diagonal portion of A */ 5398 for (j = 0; j < ncols_o; j++, k++) { 5399 cj[k] = dn + *bj++; 5400 ca[k] = *ba++; 5401 } 5402 } 5403 /* put together the new matrix */ 5404 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5405 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5406 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5407 c = (Mat_SeqAIJ *)(*A_loc)->data; 5408 c->free_a = PETSC_TRUE; 5409 c->free_ij = PETSC_TRUE; 5410 c->nonew = 0; 5411 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5412 } else if (scall == MAT_REUSE_MATRIX) { 5413 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5414 for (i = 0; i < am; i++) { 5415 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5416 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5417 /* diagonal portion of A */ 5418 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5419 /* off-diagonal portion of A */ 5420 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5421 } 5422 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5423 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5424 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5425 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5426 if (glob) { 5427 PetscInt cst, *gidx; 5428 5429 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5430 PetscCall(PetscMalloc1(dn + on, &gidx)); 5431 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5432 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5433 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5434 } 5435 } 5436 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5437 PetscFunctionReturn(PETSC_SUCCESS); 5438 } 5439 5440 /*@C 5441 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5442 5443 Not Collective 5444 5445 Input Parameters: 5446 + A - the matrix 5447 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5448 - row, col - index sets of rows and columns to extract (or NULL) 5449 5450 Output Parameter: 5451 . A_loc - the local sequential matrix generated 5452 5453 Level: developer 5454 5455 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5456 @*/ 5457 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5458 { 5459 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5460 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5461 IS isrowa, iscola; 5462 Mat *aloc; 5463 PetscBool match; 5464 5465 PetscFunctionBegin; 5466 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5467 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5468 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5469 if (!row) { 5470 start = A->rmap->rstart; 5471 end = A->rmap->rend; 5472 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5473 } else { 5474 isrowa = *row; 5475 } 5476 if (!col) { 5477 start = A->cmap->rstart; 5478 cmap = a->garray; 5479 nzA = a->A->cmap->n; 5480 nzB = a->B->cmap->n; 5481 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5482 ncols = 0; 5483 for (i = 0; i < nzB; i++) { 5484 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5485 else break; 5486 } 5487 imark = i; 5488 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5489 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5490 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5491 } else { 5492 iscola = *col; 5493 } 5494 if (scall != MAT_INITIAL_MATRIX) { 5495 PetscCall(PetscMalloc1(1, &aloc)); 5496 aloc[0] = *A_loc; 5497 } 5498 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5499 if (!col) { /* attach global id of condensed columns */ 5500 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5501 } 5502 *A_loc = aloc[0]; 5503 PetscCall(PetscFree(aloc)); 5504 if (!row) PetscCall(ISDestroy(&isrowa)); 5505 if (!col) PetscCall(ISDestroy(&iscola)); 5506 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5507 PetscFunctionReturn(PETSC_SUCCESS); 5508 } 5509 5510 /* 5511 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5512 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5513 * on a global size. 5514 * */ 5515 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5516 { 5517 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5518 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5519 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5520 PetscMPIInt owner; 5521 PetscSFNode *iremote, *oiremote; 5522 const PetscInt *lrowindices; 5523 PetscSF sf, osf; 5524 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5525 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5526 MPI_Comm comm; 5527 ISLocalToGlobalMapping mapping; 5528 const PetscScalar *pd_a, *po_a; 5529 5530 PetscFunctionBegin; 5531 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5532 /* plocalsize is the number of roots 5533 * nrows is the number of leaves 5534 * */ 5535 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5536 PetscCall(ISGetLocalSize(rows, &nrows)); 5537 PetscCall(PetscCalloc1(nrows, &iremote)); 5538 PetscCall(ISGetIndices(rows, &lrowindices)); 5539 for (i = 0; i < nrows; i++) { 5540 /* Find a remote index and an owner for a row 5541 * The row could be local or remote 5542 * */ 5543 owner = 0; 5544 lidx = 0; 5545 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5546 iremote[i].index = lidx; 5547 iremote[i].rank = owner; 5548 } 5549 /* Create SF to communicate how many nonzero columns for each row */ 5550 PetscCall(PetscSFCreate(comm, &sf)); 5551 /* SF will figure out the number of nonzero colunms for each row, and their 5552 * offsets 5553 * */ 5554 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5555 PetscCall(PetscSFSetFromOptions(sf)); 5556 PetscCall(PetscSFSetUp(sf)); 5557 5558 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5559 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5560 PetscCall(PetscCalloc1(nrows, &pnnz)); 5561 roffsets[0] = 0; 5562 roffsets[1] = 0; 5563 for (i = 0; i < plocalsize; i++) { 5564 /* diag */ 5565 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5566 /* off diag */ 5567 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5568 /* compute offsets so that we relative location for each row */ 5569 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5570 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5571 } 5572 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5573 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5574 /* 'r' means root, and 'l' means leaf */ 5575 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5576 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5577 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5578 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5579 PetscCall(PetscSFDestroy(&sf)); 5580 PetscCall(PetscFree(roffsets)); 5581 PetscCall(PetscFree(nrcols)); 5582 dntotalcols = 0; 5583 ontotalcols = 0; 5584 ncol = 0; 5585 for (i = 0; i < nrows; i++) { 5586 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5587 ncol = PetscMax(pnnz[i], ncol); 5588 /* diag */ 5589 dntotalcols += nlcols[i * 2 + 0]; 5590 /* off diag */ 5591 ontotalcols += nlcols[i * 2 + 1]; 5592 } 5593 /* We do not need to figure the right number of columns 5594 * since all the calculations will be done by going through the raw data 5595 * */ 5596 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5597 PetscCall(MatSetUp(*P_oth)); 5598 PetscCall(PetscFree(pnnz)); 5599 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5600 /* diag */ 5601 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5602 /* off diag */ 5603 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5604 /* diag */ 5605 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5606 /* off diag */ 5607 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5608 dntotalcols = 0; 5609 ontotalcols = 0; 5610 ntotalcols = 0; 5611 for (i = 0; i < nrows; i++) { 5612 owner = 0; 5613 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5614 /* Set iremote for diag matrix */ 5615 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5616 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5617 iremote[dntotalcols].rank = owner; 5618 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5619 ilocal[dntotalcols++] = ntotalcols++; 5620 } 5621 /* off diag */ 5622 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5623 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5624 oiremote[ontotalcols].rank = owner; 5625 oilocal[ontotalcols++] = ntotalcols++; 5626 } 5627 } 5628 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5629 PetscCall(PetscFree(loffsets)); 5630 PetscCall(PetscFree(nlcols)); 5631 PetscCall(PetscSFCreate(comm, &sf)); 5632 /* P serves as roots and P_oth is leaves 5633 * Diag matrix 5634 * */ 5635 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5636 PetscCall(PetscSFSetFromOptions(sf)); 5637 PetscCall(PetscSFSetUp(sf)); 5638 5639 PetscCall(PetscSFCreate(comm, &osf)); 5640 /* Off diag */ 5641 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5642 PetscCall(PetscSFSetFromOptions(osf)); 5643 PetscCall(PetscSFSetUp(osf)); 5644 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5645 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5646 /* We operate on the matrix internal data for saving memory */ 5647 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5648 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5649 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5650 /* Convert to global indices for diag matrix */ 5651 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5652 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5653 /* We want P_oth store global indices */ 5654 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5655 /* Use memory scalable approach */ 5656 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5657 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5658 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5659 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5660 /* Convert back to local indices */ 5661 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5662 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5663 nout = 0; 5664 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5665 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5666 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5667 /* Exchange values */ 5668 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5669 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5670 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5671 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5672 /* Stop PETSc from shrinking memory */ 5673 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5674 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5675 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5676 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5677 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5678 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5679 PetscCall(PetscSFDestroy(&sf)); 5680 PetscCall(PetscSFDestroy(&osf)); 5681 PetscFunctionReturn(PETSC_SUCCESS); 5682 } 5683 5684 /* 5685 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5686 * This supports MPIAIJ and MAIJ 5687 * */ 5688 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5689 { 5690 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5691 Mat_SeqAIJ *p_oth; 5692 IS rows, map; 5693 PetscHMapI hamp; 5694 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5695 MPI_Comm comm; 5696 PetscSF sf, osf; 5697 PetscBool has; 5698 5699 PetscFunctionBegin; 5700 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5701 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5702 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5703 * and then create a submatrix (that often is an overlapping matrix) 5704 * */ 5705 if (reuse == MAT_INITIAL_MATRIX) { 5706 /* Use a hash table to figure out unique keys */ 5707 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5708 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5709 count = 0; 5710 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5711 for (i = 0; i < a->B->cmap->n; i++) { 5712 key = a->garray[i] / dof; 5713 PetscCall(PetscHMapIHas(hamp, key, &has)); 5714 if (!has) { 5715 mapping[i] = count; 5716 PetscCall(PetscHMapISet(hamp, key, count++)); 5717 } else { 5718 /* Current 'i' has the same value the previous step */ 5719 mapping[i] = count - 1; 5720 } 5721 } 5722 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5723 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5724 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5725 PetscCall(PetscCalloc1(htsize, &rowindices)); 5726 off = 0; 5727 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5728 PetscCall(PetscHMapIDestroy(&hamp)); 5729 PetscCall(PetscSortInt(htsize, rowindices)); 5730 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5731 /* In case, the matrix was already created but users want to recreate the matrix */ 5732 PetscCall(MatDestroy(P_oth)); 5733 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5734 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5735 PetscCall(ISDestroy(&map)); 5736 PetscCall(ISDestroy(&rows)); 5737 } else if (reuse == MAT_REUSE_MATRIX) { 5738 /* If matrix was already created, we simply update values using SF objects 5739 * that as attached to the matrix earlier. 5740 */ 5741 const PetscScalar *pd_a, *po_a; 5742 5743 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5744 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5745 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5746 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5747 /* Update values in place */ 5748 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5749 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5750 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5751 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5752 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5753 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5754 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5755 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5756 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5757 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5758 PetscFunctionReturn(PETSC_SUCCESS); 5759 } 5760 5761 /*@C 5762 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5763 5764 Collective 5765 5766 Input Parameters: 5767 + A - the first matrix in `MATMPIAIJ` format 5768 . B - the second matrix in `MATMPIAIJ` format 5769 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5770 5771 Output Parameters: 5772 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5773 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5774 - B_seq - the sequential matrix generated 5775 5776 Level: developer 5777 5778 @*/ 5779 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5780 { 5781 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5782 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5783 IS isrowb, iscolb; 5784 Mat *bseq = NULL; 5785 5786 PetscFunctionBegin; 5787 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5788 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5789 } 5790 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5791 5792 if (scall == MAT_INITIAL_MATRIX) { 5793 start = A->cmap->rstart; 5794 cmap = a->garray; 5795 nzA = a->A->cmap->n; 5796 nzB = a->B->cmap->n; 5797 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5798 ncols = 0; 5799 for (i = 0; i < nzB; i++) { /* row < local row index */ 5800 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5801 else break; 5802 } 5803 imark = i; 5804 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5805 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5806 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5807 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5808 } else { 5809 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5810 isrowb = *rowb; 5811 iscolb = *colb; 5812 PetscCall(PetscMalloc1(1, &bseq)); 5813 bseq[0] = *B_seq; 5814 } 5815 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5816 *B_seq = bseq[0]; 5817 PetscCall(PetscFree(bseq)); 5818 if (!rowb) { 5819 PetscCall(ISDestroy(&isrowb)); 5820 } else { 5821 *rowb = isrowb; 5822 } 5823 if (!colb) { 5824 PetscCall(ISDestroy(&iscolb)); 5825 } else { 5826 *colb = iscolb; 5827 } 5828 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5829 PetscFunctionReturn(PETSC_SUCCESS); 5830 } 5831 5832 /* 5833 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5834 of the OFF-DIAGONAL portion of local A 5835 5836 Collective 5837 5838 Input Parameters: 5839 + A,B - the matrices in mpiaij format 5840 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5841 5842 Output Parameter: 5843 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5844 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5845 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5846 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5847 5848 Developer Note: 5849 This directly accesses information inside the VecScatter associated with the matrix-vector product 5850 for this matrix. This is not desirable.. 5851 5852 Level: developer 5853 5854 */ 5855 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5856 { 5857 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5858 Mat_SeqAIJ *b_oth; 5859 VecScatter ctx; 5860 MPI_Comm comm; 5861 const PetscMPIInt *rprocs, *sprocs; 5862 const PetscInt *srow, *rstarts, *sstarts; 5863 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5864 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5865 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5866 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5867 PetscMPIInt size, tag, rank, nreqs; 5868 5869 PetscFunctionBegin; 5870 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5871 PetscCallMPI(MPI_Comm_size(comm, &size)); 5872 5873 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5874 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5875 } 5876 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5877 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5878 5879 if (size == 1) { 5880 startsj_s = NULL; 5881 bufa_ptr = NULL; 5882 *B_oth = NULL; 5883 PetscFunctionReturn(PETSC_SUCCESS); 5884 } 5885 5886 ctx = a->Mvctx; 5887 tag = ((PetscObject)ctx)->tag; 5888 5889 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5890 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5891 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5892 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5893 PetscCall(PetscMalloc1(nreqs, &reqs)); 5894 rwaits = reqs; 5895 swaits = reqs + nrecvs; 5896 5897 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5898 if (scall == MAT_INITIAL_MATRIX) { 5899 /* i-array */ 5900 /*---------*/ 5901 /* post receives */ 5902 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5903 for (i = 0; i < nrecvs; i++) { 5904 rowlen = rvalues + rstarts[i] * rbs; 5905 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5906 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5907 } 5908 5909 /* pack the outgoing message */ 5910 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5911 5912 sstartsj[0] = 0; 5913 rstartsj[0] = 0; 5914 len = 0; /* total length of j or a array to be sent */ 5915 if (nsends) { 5916 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5917 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5918 } 5919 for (i = 0; i < nsends; i++) { 5920 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5921 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5922 for (j = 0; j < nrows; j++) { 5923 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5924 for (l = 0; l < sbs; l++) { 5925 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5926 5927 rowlen[j * sbs + l] = ncols; 5928 5929 len += ncols; 5930 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5931 } 5932 k++; 5933 } 5934 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5935 5936 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5937 } 5938 /* recvs and sends of i-array are completed */ 5939 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5940 PetscCall(PetscFree(svalues)); 5941 5942 /* allocate buffers for sending j and a arrays */ 5943 PetscCall(PetscMalloc1(len + 1, &bufj)); 5944 PetscCall(PetscMalloc1(len + 1, &bufa)); 5945 5946 /* create i-array of B_oth */ 5947 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5948 5949 b_othi[0] = 0; 5950 len = 0; /* total length of j or a array to be received */ 5951 k = 0; 5952 for (i = 0; i < nrecvs; i++) { 5953 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5954 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5955 for (j = 0; j < nrows; j++) { 5956 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5957 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5958 k++; 5959 } 5960 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5961 } 5962 PetscCall(PetscFree(rvalues)); 5963 5964 /* allocate space for j and a arrays of B_oth */ 5965 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5966 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5967 5968 /* j-array */ 5969 /*---------*/ 5970 /* post receives of j-array */ 5971 for (i = 0; i < nrecvs; i++) { 5972 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5973 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5974 } 5975 5976 /* pack the outgoing message j-array */ 5977 if (nsends) k = sstarts[0]; 5978 for (i = 0; i < nsends; i++) { 5979 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5980 bufJ = bufj + sstartsj[i]; 5981 for (j = 0; j < nrows; j++) { 5982 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5983 for (ll = 0; ll < sbs; ll++) { 5984 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5985 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5986 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5987 } 5988 } 5989 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5990 } 5991 5992 /* recvs and sends of j-array are completed */ 5993 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5994 } else if (scall == MAT_REUSE_MATRIX) { 5995 sstartsj = *startsj_s; 5996 rstartsj = *startsj_r; 5997 bufa = *bufa_ptr; 5998 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5999 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 6000 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 6001 6002 /* a-array */ 6003 /*---------*/ 6004 /* post receives of a-array */ 6005 for (i = 0; i < nrecvs; i++) { 6006 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 6007 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 6008 } 6009 6010 /* pack the outgoing message a-array */ 6011 if (nsends) k = sstarts[0]; 6012 for (i = 0; i < nsends; i++) { 6013 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6014 bufA = bufa + sstartsj[i]; 6015 for (j = 0; j < nrows; j++) { 6016 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6017 for (ll = 0; ll < sbs; ll++) { 6018 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6019 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6020 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6021 } 6022 } 6023 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6024 } 6025 /* recvs and sends of a-array are completed */ 6026 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6027 PetscCall(PetscFree(reqs)); 6028 6029 if (scall == MAT_INITIAL_MATRIX) { 6030 /* put together the new matrix */ 6031 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6032 6033 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6034 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6035 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6036 b_oth->free_a = PETSC_TRUE; 6037 b_oth->free_ij = PETSC_TRUE; 6038 b_oth->nonew = 0; 6039 6040 PetscCall(PetscFree(bufj)); 6041 if (!startsj_s || !bufa_ptr) { 6042 PetscCall(PetscFree2(sstartsj, rstartsj)); 6043 PetscCall(PetscFree(bufa_ptr)); 6044 } else { 6045 *startsj_s = sstartsj; 6046 *startsj_r = rstartsj; 6047 *bufa_ptr = bufa; 6048 } 6049 } else if (scall == MAT_REUSE_MATRIX) { 6050 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6051 } 6052 6053 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6054 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6055 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6056 PetscFunctionReturn(PETSC_SUCCESS); 6057 } 6058 6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6062 #if defined(PETSC_HAVE_MKL_SPARSE) 6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6064 #endif 6065 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6067 #if defined(PETSC_HAVE_ELEMENTAL) 6068 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6069 #endif 6070 #if defined(PETSC_HAVE_SCALAPACK) 6071 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6072 #endif 6073 #if defined(PETSC_HAVE_HYPRE) 6074 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6075 #endif 6076 #if defined(PETSC_HAVE_CUDA) 6077 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6078 #endif 6079 #if defined(PETSC_HAVE_HIP) 6080 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6081 #endif 6082 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6084 #endif 6085 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6086 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6087 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6088 6089 /* 6090 Computes (B'*A')' since computing B*A directly is untenable 6091 6092 n p p 6093 [ ] [ ] [ ] 6094 m [ A ] * n [ B ] = m [ C ] 6095 [ ] [ ] [ ] 6096 6097 */ 6098 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6099 { 6100 Mat At, Bt, Ct; 6101 6102 PetscFunctionBegin; 6103 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6104 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6105 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6106 PetscCall(MatDestroy(&At)); 6107 PetscCall(MatDestroy(&Bt)); 6108 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6109 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6110 PetscCall(MatDestroy(&Ct)); 6111 PetscFunctionReturn(PETSC_SUCCESS); 6112 } 6113 6114 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6115 { 6116 PetscBool cisdense; 6117 6118 PetscFunctionBegin; 6119 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6120 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6121 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6122 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6123 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6124 PetscCall(MatSetUp(C)); 6125 6126 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6127 PetscFunctionReturn(PETSC_SUCCESS); 6128 } 6129 6130 /* ----------------------------------------------------------------*/ 6131 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6132 { 6133 Mat_Product *product = C->product; 6134 Mat A = product->A, B = product->B; 6135 6136 PetscFunctionBegin; 6137 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6138 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6139 6140 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6141 C->ops->productsymbolic = MatProductSymbolic_AB; 6142 PetscFunctionReturn(PETSC_SUCCESS); 6143 } 6144 6145 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6146 { 6147 Mat_Product *product = C->product; 6148 6149 PetscFunctionBegin; 6150 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6151 PetscFunctionReturn(PETSC_SUCCESS); 6152 } 6153 6154 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6155 6156 Input Parameters: 6157 6158 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6159 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6160 6161 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6162 6163 For Set1, j1[] contains column indices of the nonzeros. 6164 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6165 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6166 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6167 6168 Similar for Set2. 6169 6170 This routine merges the two sets of nonzeros row by row and removes repeats. 6171 6172 Output Parameters: (memory is allocated by the caller) 6173 6174 i[],j[]: the CSR of the merged matrix, which has m rows. 6175 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6176 imap2[]: similar to imap1[], but for Set2. 6177 Note we order nonzeros row-by-row and from left to right. 6178 */ 6179 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6180 { 6181 PetscInt r, m; /* Row index of mat */ 6182 PetscCount t, t1, t2, b1, e1, b2, e2; 6183 6184 PetscFunctionBegin; 6185 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6186 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6187 i[0] = 0; 6188 for (r = 0; r < m; r++) { /* Do row by row merging */ 6189 b1 = rowBegin1[r]; 6190 e1 = rowEnd1[r]; 6191 b2 = rowBegin2[r]; 6192 e2 = rowEnd2[r]; 6193 while (b1 < e1 && b2 < e2) { 6194 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6195 j[t] = j1[b1]; 6196 imap1[t1] = t; 6197 imap2[t2] = t; 6198 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6199 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6200 t1++; 6201 t2++; 6202 t++; 6203 } else if (j1[b1] < j2[b2]) { 6204 j[t] = j1[b1]; 6205 imap1[t1] = t; 6206 b1 += jmap1[t1 + 1] - jmap1[t1]; 6207 t1++; 6208 t++; 6209 } else { 6210 j[t] = j2[b2]; 6211 imap2[t2] = t; 6212 b2 += jmap2[t2 + 1] - jmap2[t2]; 6213 t2++; 6214 t++; 6215 } 6216 } 6217 /* Merge the remaining in either j1[] or j2[] */ 6218 while (b1 < e1) { 6219 j[t] = j1[b1]; 6220 imap1[t1] = t; 6221 b1 += jmap1[t1 + 1] - jmap1[t1]; 6222 t1++; 6223 t++; 6224 } 6225 while (b2 < e2) { 6226 j[t] = j2[b2]; 6227 imap2[t2] = t; 6228 b2 += jmap2[t2 + 1] - jmap2[t2]; 6229 t2++; 6230 t++; 6231 } 6232 i[r + 1] = t; 6233 } 6234 PetscFunctionReturn(PETSC_SUCCESS); 6235 } 6236 6237 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6238 6239 Input Parameters: 6240 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6241 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6242 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6243 6244 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6245 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6246 6247 Output Parameters: 6248 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6249 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6250 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6251 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6252 6253 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6254 Atot: number of entries belonging to the diagonal block. 6255 Annz: number of unique nonzeros belonging to the diagonal block. 6256 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6257 repeats (i.e., same 'i,j' pair). 6258 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6259 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6260 6261 Atot: number of entries belonging to the diagonal block 6262 Annz: number of unique nonzeros belonging to the diagonal block. 6263 6264 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6265 6266 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6267 */ 6268 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6269 { 6270 PetscInt cstart, cend, rstart, rend, row, col; 6271 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6272 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6273 PetscCount k, m, p, q, r, s, mid; 6274 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6275 6276 PetscFunctionBegin; 6277 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6278 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6279 m = rend - rstart; 6280 6281 for (k = 0; k < n; k++) { 6282 if (i[k] >= 0) break; 6283 } /* Skip negative rows */ 6284 6285 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6286 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6287 */ 6288 while (k < n) { 6289 row = i[k]; 6290 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6291 for (s = k; s < n; s++) 6292 if (i[s] != row) break; 6293 for (p = k; p < s; p++) { 6294 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6295 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6296 } 6297 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6298 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6299 rowBegin[row - rstart] = k; 6300 rowMid[row - rstart] = mid; 6301 rowEnd[row - rstart] = s; 6302 6303 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6304 Atot += mid - k; 6305 Btot += s - mid; 6306 6307 /* Count unique nonzeros of this diag/offdiag row */ 6308 for (p = k; p < mid;) { 6309 col = j[p]; 6310 do { 6311 j[p] += PETSC_MAX_INT; 6312 p++; 6313 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6314 Annz++; 6315 } 6316 6317 for (p = mid; p < s;) { 6318 col = j[p]; 6319 do { 6320 p++; 6321 } while (p < s && j[p] == col); 6322 Bnnz++; 6323 } 6324 k = s; 6325 } 6326 6327 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6328 PetscCall(PetscMalloc1(Atot, &Aperm)); 6329 PetscCall(PetscMalloc1(Btot, &Bperm)); 6330 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6331 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6332 6333 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6334 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6335 for (r = 0; r < m; r++) { 6336 k = rowBegin[r]; 6337 mid = rowMid[r]; 6338 s = rowEnd[r]; 6339 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6340 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6341 Atot += mid - k; 6342 Btot += s - mid; 6343 6344 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6345 for (p = k; p < mid;) { 6346 col = j[p]; 6347 q = p; 6348 do { 6349 p++; 6350 } while (p < mid && j[p] == col); 6351 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6352 Annz++; 6353 } 6354 6355 for (p = mid; p < s;) { 6356 col = j[p]; 6357 q = p; 6358 do { 6359 p++; 6360 } while (p < s && j[p] == col); 6361 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6362 Bnnz++; 6363 } 6364 } 6365 /* Output */ 6366 *Aperm_ = Aperm; 6367 *Annz_ = Annz; 6368 *Atot_ = Atot; 6369 *Ajmap_ = Ajmap; 6370 *Bperm_ = Bperm; 6371 *Bnnz_ = Bnnz; 6372 *Btot_ = Btot; 6373 *Bjmap_ = Bjmap; 6374 PetscFunctionReturn(PETSC_SUCCESS); 6375 } 6376 6377 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6378 6379 Input Parameters: 6380 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6381 nnz: number of unique nonzeros in the merged matrix 6382 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6383 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6384 6385 Output Parameter: (memory is allocated by the caller) 6386 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6387 6388 Example: 6389 nnz1 = 4 6390 nnz = 6 6391 imap = [1,3,4,5] 6392 jmap = [0,3,5,6,7] 6393 then, 6394 jmap_new = [0,0,3,3,5,6,7] 6395 */ 6396 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6397 { 6398 PetscCount k, p; 6399 6400 PetscFunctionBegin; 6401 jmap_new[0] = 0; 6402 p = nnz; /* p loops over jmap_new[] backwards */ 6403 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6404 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6405 } 6406 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6407 PetscFunctionReturn(PETSC_SUCCESS); 6408 } 6409 6410 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6411 { 6412 MPI_Comm comm; 6413 PetscMPIInt rank, size; 6414 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6415 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6416 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6417 6418 PetscFunctionBegin; 6419 PetscCall(PetscFree(mpiaij->garray)); 6420 PetscCall(VecDestroy(&mpiaij->lvec)); 6421 #if defined(PETSC_USE_CTABLE) 6422 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6423 #else 6424 PetscCall(PetscFree(mpiaij->colmap)); 6425 #endif 6426 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6427 mat->assembled = PETSC_FALSE; 6428 mat->was_assembled = PETSC_FALSE; 6429 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6430 6431 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6432 PetscCallMPI(MPI_Comm_size(comm, &size)); 6433 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6434 PetscCall(PetscLayoutSetUp(mat->rmap)); 6435 PetscCall(PetscLayoutSetUp(mat->cmap)); 6436 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6437 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6438 PetscCall(MatGetLocalSize(mat, &m, &n)); 6439 PetscCall(MatGetSize(mat, &M, &N)); 6440 6441 /* ---------------------------------------------------------------------------*/ 6442 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6443 /* entries come first, then local rows, then remote rows. */ 6444 /* ---------------------------------------------------------------------------*/ 6445 PetscCount n1 = coo_n, *perm1; 6446 PetscInt *i1 = coo_i, *j1 = coo_j; 6447 6448 PetscCall(PetscMalloc1(n1, &perm1)); 6449 for (k = 0; k < n1; k++) perm1[k] = k; 6450 6451 /* Manipulate indices so that entries with negative row or col indices will have smallest 6452 row indices, local entries will have greater but negative row indices, and remote entries 6453 will have positive row indices. 6454 */ 6455 for (k = 0; k < n1; k++) { 6456 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6457 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6458 else { 6459 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6460 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6461 } 6462 } 6463 6464 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6465 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6466 for (k = 0; k < n1; k++) { 6467 if (i1[k] > PETSC_MIN_INT) break; 6468 } /* Advance k to the first entry we need to take care of */ 6469 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6470 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6471 6472 /* ---------------------------------------------------------------------------*/ 6473 /* Split local rows into diag/offdiag portions */ 6474 /* ---------------------------------------------------------------------------*/ 6475 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6476 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6477 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6478 6479 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6480 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6481 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6482 6483 /* ---------------------------------------------------------------------------*/ 6484 /* Send remote rows to their owner */ 6485 /* ---------------------------------------------------------------------------*/ 6486 /* Find which rows should be sent to which remote ranks*/ 6487 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6488 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6489 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6490 const PetscInt *ranges; 6491 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6492 6493 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6494 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6495 for (k = rem; k < n1;) { 6496 PetscMPIInt owner; 6497 PetscInt firstRow, lastRow; 6498 6499 /* Locate a row range */ 6500 firstRow = i1[k]; /* first row of this owner */ 6501 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6502 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6503 6504 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6505 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6506 6507 /* All entries in [k,p) belong to this remote owner */ 6508 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6509 PetscMPIInt *sendto2; 6510 PetscInt *nentries2; 6511 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6512 6513 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6514 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6515 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6516 PetscCall(PetscFree2(sendto, nentries2)); 6517 sendto = sendto2; 6518 nentries = nentries2; 6519 maxNsend = maxNsend2; 6520 } 6521 sendto[nsend] = owner; 6522 nentries[nsend] = p - k; 6523 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6524 nsend++; 6525 k = p; 6526 } 6527 6528 /* Build 1st SF to know offsets on remote to send data */ 6529 PetscSF sf1; 6530 PetscInt nroots = 1, nroots2 = 0; 6531 PetscInt nleaves = nsend, nleaves2 = 0; 6532 PetscInt *offsets; 6533 PetscSFNode *iremote; 6534 6535 PetscCall(PetscSFCreate(comm, &sf1)); 6536 PetscCall(PetscMalloc1(nsend, &iremote)); 6537 PetscCall(PetscMalloc1(nsend, &offsets)); 6538 for (k = 0; k < nsend; k++) { 6539 iremote[k].rank = sendto[k]; 6540 iremote[k].index = 0; 6541 nleaves2 += nentries[k]; 6542 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6543 } 6544 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6545 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6546 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6547 PetscCall(PetscSFDestroy(&sf1)); 6548 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6549 6550 /* Build 2nd SF to send remote COOs to their owner */ 6551 PetscSF sf2; 6552 nroots = nroots2; 6553 nleaves = nleaves2; 6554 PetscCall(PetscSFCreate(comm, &sf2)); 6555 PetscCall(PetscSFSetFromOptions(sf2)); 6556 PetscCall(PetscMalloc1(nleaves, &iremote)); 6557 p = 0; 6558 for (k = 0; k < nsend; k++) { 6559 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6560 for (q = 0; q < nentries[k]; q++, p++) { 6561 iremote[p].rank = sendto[k]; 6562 iremote[p].index = offsets[k] + q; 6563 } 6564 } 6565 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6566 6567 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6568 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6569 6570 /* Send the remote COOs to their owner */ 6571 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6572 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6573 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6574 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6575 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6576 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6577 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6578 6579 PetscCall(PetscFree(offsets)); 6580 PetscCall(PetscFree2(sendto, nentries)); 6581 6582 /* ---------------------------------------------------------------*/ 6583 /* Sort received COOs by row along with the permutation array */ 6584 /* ---------------------------------------------------------------*/ 6585 for (k = 0; k < n2; k++) perm2[k] = k; 6586 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6587 6588 /* ---------------------------------------------------------------*/ 6589 /* Split received COOs into diag/offdiag portions */ 6590 /* ---------------------------------------------------------------*/ 6591 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6592 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6593 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6594 6595 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6596 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6597 6598 /* --------------------------------------------------------------------------*/ 6599 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6600 /* --------------------------------------------------------------------------*/ 6601 PetscInt *Ai, *Bi; 6602 PetscInt *Aj, *Bj; 6603 6604 PetscCall(PetscMalloc1(m + 1, &Ai)); 6605 PetscCall(PetscMalloc1(m + 1, &Bi)); 6606 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6607 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6608 6609 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6610 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6611 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6612 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6613 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6614 6615 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6616 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6617 6618 /* --------------------------------------------------------------------------*/ 6619 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6620 /* expect nonzeros in A/B most likely have local contributing entries */ 6621 /* --------------------------------------------------------------------------*/ 6622 PetscInt Annz = Ai[m]; 6623 PetscInt Bnnz = Bi[m]; 6624 PetscCount *Ajmap1_new, *Bjmap1_new; 6625 6626 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6627 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6628 6629 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6630 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6631 6632 PetscCall(PetscFree(Aimap1)); 6633 PetscCall(PetscFree(Ajmap1)); 6634 PetscCall(PetscFree(Bimap1)); 6635 PetscCall(PetscFree(Bjmap1)); 6636 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6637 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6638 PetscCall(PetscFree(perm1)); 6639 PetscCall(PetscFree3(i2, j2, perm2)); 6640 6641 Ajmap1 = Ajmap1_new; 6642 Bjmap1 = Bjmap1_new; 6643 6644 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6645 if (Annz < Annz1 + Annz2) { 6646 PetscInt *Aj_new; 6647 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6648 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6649 PetscCall(PetscFree(Aj)); 6650 Aj = Aj_new; 6651 } 6652 6653 if (Bnnz < Bnnz1 + Bnnz2) { 6654 PetscInt *Bj_new; 6655 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6656 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6657 PetscCall(PetscFree(Bj)); 6658 Bj = Bj_new; 6659 } 6660 6661 /* --------------------------------------------------------------------------------*/ 6662 /* Create new submatrices for on-process and off-process coupling */ 6663 /* --------------------------------------------------------------------------------*/ 6664 PetscScalar *Aa, *Ba; 6665 MatType rtype; 6666 Mat_SeqAIJ *a, *b; 6667 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6668 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6669 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6670 if (cstart) { 6671 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6672 } 6673 PetscCall(MatDestroy(&mpiaij->A)); 6674 PetscCall(MatDestroy(&mpiaij->B)); 6675 PetscCall(MatGetRootType_Private(mat, &rtype)); 6676 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6677 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6678 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6679 6680 a = (Mat_SeqAIJ *)mpiaij->A->data; 6681 b = (Mat_SeqAIJ *)mpiaij->B->data; 6682 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6683 a->free_a = b->free_a = PETSC_TRUE; 6684 a->free_ij = b->free_ij = PETSC_TRUE; 6685 6686 /* conversion must happen AFTER multiply setup */ 6687 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6688 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6689 PetscCall(VecDestroy(&mpiaij->lvec)); 6690 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6691 6692 mpiaij->coo_n = coo_n; 6693 mpiaij->coo_sf = sf2; 6694 mpiaij->sendlen = nleaves; 6695 mpiaij->recvlen = nroots; 6696 6697 mpiaij->Annz = Annz; 6698 mpiaij->Bnnz = Bnnz; 6699 6700 mpiaij->Annz2 = Annz2; 6701 mpiaij->Bnnz2 = Bnnz2; 6702 6703 mpiaij->Atot1 = Atot1; 6704 mpiaij->Atot2 = Atot2; 6705 mpiaij->Btot1 = Btot1; 6706 mpiaij->Btot2 = Btot2; 6707 6708 mpiaij->Ajmap1 = Ajmap1; 6709 mpiaij->Aperm1 = Aperm1; 6710 6711 mpiaij->Bjmap1 = Bjmap1; 6712 mpiaij->Bperm1 = Bperm1; 6713 6714 mpiaij->Aimap2 = Aimap2; 6715 mpiaij->Ajmap2 = Ajmap2; 6716 mpiaij->Aperm2 = Aperm2; 6717 6718 mpiaij->Bimap2 = Bimap2; 6719 mpiaij->Bjmap2 = Bjmap2; 6720 mpiaij->Bperm2 = Bperm2; 6721 6722 mpiaij->Cperm1 = Cperm1; 6723 6724 /* Allocate in preallocation. If not used, it has zero cost on host */ 6725 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6726 PetscFunctionReturn(PETSC_SUCCESS); 6727 } 6728 6729 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6730 { 6731 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6732 Mat A = mpiaij->A, B = mpiaij->B; 6733 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6734 PetscScalar *Aa, *Ba; 6735 PetscScalar *sendbuf = mpiaij->sendbuf; 6736 PetscScalar *recvbuf = mpiaij->recvbuf; 6737 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6738 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6739 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6740 const PetscCount *Cperm1 = mpiaij->Cperm1; 6741 6742 PetscFunctionBegin; 6743 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6744 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6745 6746 /* Pack entries to be sent to remote */ 6747 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6748 6749 /* Send remote entries to their owner and overlap the communication with local computation */ 6750 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6751 /* Add local entries to A and B */ 6752 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6753 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6754 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6755 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6756 } 6757 for (PetscCount i = 0; i < Bnnz; i++) { 6758 PetscScalar sum = 0.0; 6759 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6760 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6761 } 6762 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6763 6764 /* Add received remote entries to A and B */ 6765 for (PetscCount i = 0; i < Annz2; i++) { 6766 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6767 } 6768 for (PetscCount i = 0; i < Bnnz2; i++) { 6769 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6770 } 6771 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6772 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6773 PetscFunctionReturn(PETSC_SUCCESS); 6774 } 6775 6776 /*MC 6777 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6778 6779 Options Database Keys: 6780 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6781 6782 Level: beginner 6783 6784 Notes: 6785 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6786 in this case the values associated with the rows and columns one passes in are set to zero 6787 in the matrix 6788 6789 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6790 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6791 6792 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6793 M*/ 6794 6795 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6796 { 6797 Mat_MPIAIJ *b; 6798 PetscMPIInt size; 6799 6800 PetscFunctionBegin; 6801 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6802 6803 PetscCall(PetscNew(&b)); 6804 B->data = (void *)b; 6805 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6806 B->assembled = PETSC_FALSE; 6807 B->insertmode = NOT_SET_VALUES; 6808 b->size = size; 6809 6810 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6811 6812 /* build cache for off array entries formed */ 6813 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6814 6815 b->donotstash = PETSC_FALSE; 6816 b->colmap = NULL; 6817 b->garray = NULL; 6818 b->roworiented = PETSC_TRUE; 6819 6820 /* stuff used for matrix vector multiply */ 6821 b->lvec = NULL; 6822 b->Mvctx = NULL; 6823 6824 /* stuff for MatGetRow() */ 6825 b->rowindices = NULL; 6826 b->rowvalues = NULL; 6827 b->getrowactive = PETSC_FALSE; 6828 6829 /* flexible pointer used in CUSPARSE classes */ 6830 b->spptr = NULL; 6831 6832 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6842 #if defined(PETSC_HAVE_CUDA) 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6844 #endif 6845 #if defined(PETSC_HAVE_HIP) 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6847 #endif 6848 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6850 #endif 6851 #if defined(PETSC_HAVE_MKL_SPARSE) 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6853 #endif 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6858 #if defined(PETSC_HAVE_ELEMENTAL) 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6860 #endif 6861 #if defined(PETSC_HAVE_SCALAPACK) 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6863 #endif 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6866 #if defined(PETSC_HAVE_HYPRE) 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6869 #endif 6870 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6872 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6874 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6875 PetscFunctionReturn(PETSC_SUCCESS); 6876 } 6877 6878 /*@C 6879 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6880 and "off-diagonal" part of the matrix in CSR format. 6881 6882 Collective 6883 6884 Input Parameters: 6885 + comm - MPI communicator 6886 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6887 . n - This value should be the same as the local size used in creating the 6888 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6889 calculated if N is given) For square matrices n is almost always m. 6890 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6891 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6892 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6893 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6894 . a - matrix values 6895 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6896 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6897 - oa - matrix values 6898 6899 Output Parameter: 6900 . mat - the matrix 6901 6902 Level: advanced 6903 6904 Notes: 6905 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6906 must free the arrays once the matrix has been destroyed and not before. 6907 6908 The i and j indices are 0 based 6909 6910 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6911 6912 This sets local rows and cannot be used to set off-processor values. 6913 6914 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6915 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6916 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6917 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6918 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6919 communication if it is known that only local entries will be set. 6920 6921 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6922 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6923 @*/ 6924 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6925 { 6926 Mat_MPIAIJ *maij; 6927 6928 PetscFunctionBegin; 6929 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6930 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6931 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6932 PetscCall(MatCreate(comm, mat)); 6933 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6934 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6935 maij = (Mat_MPIAIJ *)(*mat)->data; 6936 6937 (*mat)->preallocated = PETSC_TRUE; 6938 6939 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6940 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6941 6942 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6943 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6944 6945 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6946 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6947 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6948 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6949 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6950 PetscFunctionReturn(PETSC_SUCCESS); 6951 } 6952 6953 typedef struct { 6954 Mat *mp; /* intermediate products */ 6955 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6956 PetscInt cp; /* number of intermediate products */ 6957 6958 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6959 PetscInt *startsj_s, *startsj_r; 6960 PetscScalar *bufa; 6961 Mat P_oth; 6962 6963 /* may take advantage of merging product->B */ 6964 Mat Bloc; /* B-local by merging diag and off-diag */ 6965 6966 /* cusparse does not have support to split between symbolic and numeric phases. 6967 When api_user is true, we don't need to update the numerical values 6968 of the temporary storage */ 6969 PetscBool reusesym; 6970 6971 /* support for COO values insertion */ 6972 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6973 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6974 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6975 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6976 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6977 PetscMemType mtype; 6978 6979 /* customization */ 6980 PetscBool abmerge; 6981 PetscBool P_oth_bind; 6982 } MatMatMPIAIJBACKEND; 6983 6984 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6985 { 6986 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6987 PetscInt i; 6988 6989 PetscFunctionBegin; 6990 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6991 PetscCall(PetscFree(mmdata->bufa)); 6992 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6993 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6994 PetscCall(MatDestroy(&mmdata->P_oth)); 6995 PetscCall(MatDestroy(&mmdata->Bloc)); 6996 PetscCall(PetscSFDestroy(&mmdata->sf)); 6997 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6998 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6999 PetscCall(PetscFree(mmdata->own[0])); 7000 PetscCall(PetscFree(mmdata->own)); 7001 PetscCall(PetscFree(mmdata->off[0])); 7002 PetscCall(PetscFree(mmdata->off)); 7003 PetscCall(PetscFree(mmdata)); 7004 PetscFunctionReturn(PETSC_SUCCESS); 7005 } 7006 7007 /* Copy selected n entries with indices in idx[] of A to v[]. 7008 If idx is NULL, copy the whole data array of A to v[] 7009 */ 7010 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7011 { 7012 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7013 7014 PetscFunctionBegin; 7015 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7016 if (f) { 7017 PetscCall((*f)(A, n, idx, v)); 7018 } else { 7019 const PetscScalar *vv; 7020 7021 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7022 if (n && idx) { 7023 PetscScalar *w = v; 7024 const PetscInt *oi = idx; 7025 PetscInt j; 7026 7027 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7028 } else { 7029 PetscCall(PetscArraycpy(v, vv, n)); 7030 } 7031 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7032 } 7033 PetscFunctionReturn(PETSC_SUCCESS); 7034 } 7035 7036 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7037 { 7038 MatMatMPIAIJBACKEND *mmdata; 7039 PetscInt i, n_d, n_o; 7040 7041 PetscFunctionBegin; 7042 MatCheckProduct(C, 1); 7043 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7044 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7045 if (!mmdata->reusesym) { /* update temporary matrices */ 7046 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7047 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7048 } 7049 mmdata->reusesym = PETSC_FALSE; 7050 7051 for (i = 0; i < mmdata->cp; i++) { 7052 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7053 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7054 } 7055 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7056 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7057 7058 if (mmdata->mptmp[i]) continue; 7059 if (noff) { 7060 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7061 7062 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7063 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7064 n_o += noff; 7065 n_d += nown; 7066 } else { 7067 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7068 7069 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7070 n_d += mm->nz; 7071 } 7072 } 7073 if (mmdata->hasoffproc) { /* offprocess insertion */ 7074 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7075 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7076 } 7077 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7078 PetscFunctionReturn(PETSC_SUCCESS); 7079 } 7080 7081 /* Support for Pt * A, A * P, or Pt * A * P */ 7082 #define MAX_NUMBER_INTERMEDIATE 4 7083 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7084 { 7085 Mat_Product *product = C->product; 7086 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7087 Mat_MPIAIJ *a, *p; 7088 MatMatMPIAIJBACKEND *mmdata; 7089 ISLocalToGlobalMapping P_oth_l2g = NULL; 7090 IS glob = NULL; 7091 const char *prefix; 7092 char pprefix[256]; 7093 const PetscInt *globidx, *P_oth_idx; 7094 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7095 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7096 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7097 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7098 /* a base offset; type-2: sparse with a local to global map table */ 7099 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7100 7101 MatProductType ptype; 7102 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7103 PetscMPIInt size; 7104 7105 PetscFunctionBegin; 7106 MatCheckProduct(C, 1); 7107 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7108 ptype = product->type; 7109 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7110 ptype = MATPRODUCT_AB; 7111 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7112 } 7113 switch (ptype) { 7114 case MATPRODUCT_AB: 7115 A = product->A; 7116 P = product->B; 7117 m = A->rmap->n; 7118 n = P->cmap->n; 7119 M = A->rmap->N; 7120 N = P->cmap->N; 7121 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7122 break; 7123 case MATPRODUCT_AtB: 7124 P = product->A; 7125 A = product->B; 7126 m = P->cmap->n; 7127 n = A->cmap->n; 7128 M = P->cmap->N; 7129 N = A->cmap->N; 7130 hasoffproc = PETSC_TRUE; 7131 break; 7132 case MATPRODUCT_PtAP: 7133 A = product->A; 7134 P = product->B; 7135 m = P->cmap->n; 7136 n = P->cmap->n; 7137 M = P->cmap->N; 7138 N = P->cmap->N; 7139 hasoffproc = PETSC_TRUE; 7140 break; 7141 default: 7142 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7143 } 7144 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7145 if (size == 1) hasoffproc = PETSC_FALSE; 7146 7147 /* defaults */ 7148 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7149 mp[i] = NULL; 7150 mptmp[i] = PETSC_FALSE; 7151 rmapt[i] = -1; 7152 cmapt[i] = -1; 7153 rmapa[i] = NULL; 7154 cmapa[i] = NULL; 7155 } 7156 7157 /* customization */ 7158 PetscCall(PetscNew(&mmdata)); 7159 mmdata->reusesym = product->api_user; 7160 if (ptype == MATPRODUCT_AB) { 7161 if (product->api_user) { 7162 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7163 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7164 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7165 PetscOptionsEnd(); 7166 } else { 7167 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7168 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7169 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7170 PetscOptionsEnd(); 7171 } 7172 } else if (ptype == MATPRODUCT_PtAP) { 7173 if (product->api_user) { 7174 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7175 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7176 PetscOptionsEnd(); 7177 } else { 7178 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7179 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7180 PetscOptionsEnd(); 7181 } 7182 } 7183 a = (Mat_MPIAIJ *)A->data; 7184 p = (Mat_MPIAIJ *)P->data; 7185 PetscCall(MatSetSizes(C, m, n, M, N)); 7186 PetscCall(PetscLayoutSetUp(C->rmap)); 7187 PetscCall(PetscLayoutSetUp(C->cmap)); 7188 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7189 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7190 7191 cp = 0; 7192 switch (ptype) { 7193 case MATPRODUCT_AB: /* A * P */ 7194 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7195 7196 /* A_diag * P_local (merged or not) */ 7197 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7198 /* P is product->B */ 7199 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7200 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7201 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7202 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7203 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7204 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7205 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7206 mp[cp]->product->api_user = product->api_user; 7207 PetscCall(MatProductSetFromOptions(mp[cp])); 7208 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7209 PetscCall(ISGetIndices(glob, &globidx)); 7210 rmapt[cp] = 1; 7211 cmapt[cp] = 2; 7212 cmapa[cp] = globidx; 7213 mptmp[cp] = PETSC_FALSE; 7214 cp++; 7215 } else { /* A_diag * P_diag and A_diag * P_off */ 7216 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7217 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7218 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7219 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7220 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7221 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7222 mp[cp]->product->api_user = product->api_user; 7223 PetscCall(MatProductSetFromOptions(mp[cp])); 7224 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7225 rmapt[cp] = 1; 7226 cmapt[cp] = 1; 7227 mptmp[cp] = PETSC_FALSE; 7228 cp++; 7229 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7230 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7231 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7232 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7233 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7234 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7235 mp[cp]->product->api_user = product->api_user; 7236 PetscCall(MatProductSetFromOptions(mp[cp])); 7237 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7238 rmapt[cp] = 1; 7239 cmapt[cp] = 2; 7240 cmapa[cp] = p->garray; 7241 mptmp[cp] = PETSC_FALSE; 7242 cp++; 7243 } 7244 7245 /* A_off * P_other */ 7246 if (mmdata->P_oth) { 7247 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7248 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7249 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7250 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7251 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7252 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7253 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7254 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7255 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7256 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7257 mp[cp]->product->api_user = product->api_user; 7258 PetscCall(MatProductSetFromOptions(mp[cp])); 7259 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7260 rmapt[cp] = 1; 7261 cmapt[cp] = 2; 7262 cmapa[cp] = P_oth_idx; 7263 mptmp[cp] = PETSC_FALSE; 7264 cp++; 7265 } 7266 break; 7267 7268 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7269 /* A is product->B */ 7270 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7271 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7272 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7273 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7274 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7275 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7276 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7277 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7278 mp[cp]->product->api_user = product->api_user; 7279 PetscCall(MatProductSetFromOptions(mp[cp])); 7280 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7281 PetscCall(ISGetIndices(glob, &globidx)); 7282 rmapt[cp] = 2; 7283 rmapa[cp] = globidx; 7284 cmapt[cp] = 2; 7285 cmapa[cp] = globidx; 7286 mptmp[cp] = PETSC_FALSE; 7287 cp++; 7288 } else { 7289 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7290 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7291 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7292 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7293 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7294 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7295 mp[cp]->product->api_user = product->api_user; 7296 PetscCall(MatProductSetFromOptions(mp[cp])); 7297 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7298 PetscCall(ISGetIndices(glob, &globidx)); 7299 rmapt[cp] = 1; 7300 cmapt[cp] = 2; 7301 cmapa[cp] = globidx; 7302 mptmp[cp] = PETSC_FALSE; 7303 cp++; 7304 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7305 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7306 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7307 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7308 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7309 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7310 mp[cp]->product->api_user = product->api_user; 7311 PetscCall(MatProductSetFromOptions(mp[cp])); 7312 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7313 rmapt[cp] = 2; 7314 rmapa[cp] = p->garray; 7315 cmapt[cp] = 2; 7316 cmapa[cp] = globidx; 7317 mptmp[cp] = PETSC_FALSE; 7318 cp++; 7319 } 7320 break; 7321 case MATPRODUCT_PtAP: 7322 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7323 /* P is product->B */ 7324 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7325 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7326 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7327 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7328 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7329 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7330 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7331 mp[cp]->product->api_user = product->api_user; 7332 PetscCall(MatProductSetFromOptions(mp[cp])); 7333 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7334 PetscCall(ISGetIndices(glob, &globidx)); 7335 rmapt[cp] = 2; 7336 rmapa[cp] = globidx; 7337 cmapt[cp] = 2; 7338 cmapa[cp] = globidx; 7339 mptmp[cp] = PETSC_FALSE; 7340 cp++; 7341 if (mmdata->P_oth) { 7342 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7343 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7344 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7345 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7346 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7347 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7348 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7349 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7350 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7351 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7352 mp[cp]->product->api_user = product->api_user; 7353 PetscCall(MatProductSetFromOptions(mp[cp])); 7354 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7355 mptmp[cp] = PETSC_TRUE; 7356 cp++; 7357 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7358 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7359 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7360 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7361 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7362 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7363 mp[cp]->product->api_user = product->api_user; 7364 PetscCall(MatProductSetFromOptions(mp[cp])); 7365 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7366 rmapt[cp] = 2; 7367 rmapa[cp] = globidx; 7368 cmapt[cp] = 2; 7369 cmapa[cp] = P_oth_idx; 7370 mptmp[cp] = PETSC_FALSE; 7371 cp++; 7372 } 7373 break; 7374 default: 7375 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7376 } 7377 /* sanity check */ 7378 if (size > 1) 7379 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7380 7381 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7382 for (i = 0; i < cp; i++) { 7383 mmdata->mp[i] = mp[i]; 7384 mmdata->mptmp[i] = mptmp[i]; 7385 } 7386 mmdata->cp = cp; 7387 C->product->data = mmdata; 7388 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7389 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7390 7391 /* memory type */ 7392 mmdata->mtype = PETSC_MEMTYPE_HOST; 7393 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7394 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7395 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7396 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7397 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7398 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7399 7400 /* prepare coo coordinates for values insertion */ 7401 7402 /* count total nonzeros of those intermediate seqaij Mats 7403 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7404 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7405 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7406 */ 7407 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7408 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7409 if (mptmp[cp]) continue; 7410 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7411 const PetscInt *rmap = rmapa[cp]; 7412 const PetscInt mr = mp[cp]->rmap->n; 7413 const PetscInt rs = C->rmap->rstart; 7414 const PetscInt re = C->rmap->rend; 7415 const PetscInt *ii = mm->i; 7416 for (i = 0; i < mr; i++) { 7417 const PetscInt gr = rmap[i]; 7418 const PetscInt nz = ii[i + 1] - ii[i]; 7419 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7420 else ncoo_oown += nz; /* this row is local */ 7421 } 7422 } else ncoo_d += mm->nz; 7423 } 7424 7425 /* 7426 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7427 7428 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7429 7430 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7431 7432 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7433 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7434 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7435 7436 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7437 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7438 */ 7439 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7440 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7441 7442 /* gather (i,j) of nonzeros inserted by remote procs */ 7443 if (hasoffproc) { 7444 PetscSF msf; 7445 PetscInt ncoo2, *coo_i2, *coo_j2; 7446 7447 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7448 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7449 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7450 7451 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7452 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7453 PetscInt *idxoff = mmdata->off[cp]; 7454 PetscInt *idxown = mmdata->own[cp]; 7455 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7456 const PetscInt *rmap = rmapa[cp]; 7457 const PetscInt *cmap = cmapa[cp]; 7458 const PetscInt *ii = mm->i; 7459 PetscInt *coi = coo_i + ncoo_o; 7460 PetscInt *coj = coo_j + ncoo_o; 7461 const PetscInt mr = mp[cp]->rmap->n; 7462 const PetscInt rs = C->rmap->rstart; 7463 const PetscInt re = C->rmap->rend; 7464 const PetscInt cs = C->cmap->rstart; 7465 for (i = 0; i < mr; i++) { 7466 const PetscInt *jj = mm->j + ii[i]; 7467 const PetscInt gr = rmap[i]; 7468 const PetscInt nz = ii[i + 1] - ii[i]; 7469 if (gr < rs || gr >= re) { /* this is an offproc row */ 7470 for (j = ii[i]; j < ii[i + 1]; j++) { 7471 *coi++ = gr; 7472 *idxoff++ = j; 7473 } 7474 if (!cmapt[cp]) { /* already global */ 7475 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7476 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7477 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7478 } else { /* offdiag */ 7479 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7480 } 7481 ncoo_o += nz; 7482 } else { /* this is a local row */ 7483 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7484 } 7485 } 7486 } 7487 mmdata->off[cp + 1] = idxoff; 7488 mmdata->own[cp + 1] = idxown; 7489 } 7490 7491 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7492 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7493 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7494 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7495 ncoo = ncoo_d + ncoo_oown + ncoo2; 7496 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7497 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7498 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7499 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7500 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7501 PetscCall(PetscFree2(coo_i, coo_j)); 7502 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7503 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7504 coo_i = coo_i2; 7505 coo_j = coo_j2; 7506 } else { /* no offproc values insertion */ 7507 ncoo = ncoo_d; 7508 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7509 7510 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7511 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7512 PetscCall(PetscSFSetUp(mmdata->sf)); 7513 } 7514 mmdata->hasoffproc = hasoffproc; 7515 7516 /* gather (i,j) of nonzeros inserted locally */ 7517 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7518 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7519 PetscInt *coi = coo_i + ncoo_d; 7520 PetscInt *coj = coo_j + ncoo_d; 7521 const PetscInt *jj = mm->j; 7522 const PetscInt *ii = mm->i; 7523 const PetscInt *cmap = cmapa[cp]; 7524 const PetscInt *rmap = rmapa[cp]; 7525 const PetscInt mr = mp[cp]->rmap->n; 7526 const PetscInt rs = C->rmap->rstart; 7527 const PetscInt re = C->rmap->rend; 7528 const PetscInt cs = C->cmap->rstart; 7529 7530 if (mptmp[cp]) continue; 7531 if (rmapt[cp] == 1) { /* consecutive rows */ 7532 /* fill coo_i */ 7533 for (i = 0; i < mr; i++) { 7534 const PetscInt gr = i + rs; 7535 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7536 } 7537 /* fill coo_j */ 7538 if (!cmapt[cp]) { /* type-0, already global */ 7539 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7540 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7541 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7542 } else { /* type-2, local to global for sparse columns */ 7543 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7544 } 7545 ncoo_d += mm->nz; 7546 } else if (rmapt[cp] == 2) { /* sparse rows */ 7547 for (i = 0; i < mr; i++) { 7548 const PetscInt *jj = mm->j + ii[i]; 7549 const PetscInt gr = rmap[i]; 7550 const PetscInt nz = ii[i + 1] - ii[i]; 7551 if (gr >= rs && gr < re) { /* local rows */ 7552 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7553 if (!cmapt[cp]) { /* type-0, already global */ 7554 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7555 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7556 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7557 } else { /* type-2, local to global for sparse columns */ 7558 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7559 } 7560 ncoo_d += nz; 7561 } 7562 } 7563 } 7564 } 7565 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7566 PetscCall(ISDestroy(&glob)); 7567 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7568 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7569 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7570 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7571 7572 /* preallocate with COO data */ 7573 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7574 PetscCall(PetscFree2(coo_i, coo_j)); 7575 PetscFunctionReturn(PETSC_SUCCESS); 7576 } 7577 7578 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7579 { 7580 Mat_Product *product = mat->product; 7581 #if defined(PETSC_HAVE_DEVICE) 7582 PetscBool match = PETSC_FALSE; 7583 PetscBool usecpu = PETSC_FALSE; 7584 #else 7585 PetscBool match = PETSC_TRUE; 7586 #endif 7587 7588 PetscFunctionBegin; 7589 MatCheckProduct(mat, 1); 7590 #if defined(PETSC_HAVE_DEVICE) 7591 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7592 if (match) { /* we can always fallback to the CPU if requested */ 7593 switch (product->type) { 7594 case MATPRODUCT_AB: 7595 if (product->api_user) { 7596 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7597 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7598 PetscOptionsEnd(); 7599 } else { 7600 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7601 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7602 PetscOptionsEnd(); 7603 } 7604 break; 7605 case MATPRODUCT_AtB: 7606 if (product->api_user) { 7607 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7608 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7609 PetscOptionsEnd(); 7610 } else { 7611 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7612 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7613 PetscOptionsEnd(); 7614 } 7615 break; 7616 case MATPRODUCT_PtAP: 7617 if (product->api_user) { 7618 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7619 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7620 PetscOptionsEnd(); 7621 } else { 7622 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7623 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7624 PetscOptionsEnd(); 7625 } 7626 break; 7627 default: 7628 break; 7629 } 7630 match = (PetscBool)!usecpu; 7631 } 7632 #endif 7633 if (match) { 7634 switch (product->type) { 7635 case MATPRODUCT_AB: 7636 case MATPRODUCT_AtB: 7637 case MATPRODUCT_PtAP: 7638 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7639 break; 7640 default: 7641 break; 7642 } 7643 } 7644 /* fallback to MPIAIJ ops */ 7645 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7646 PetscFunctionReturn(PETSC_SUCCESS); 7647 } 7648 7649 /* 7650 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7651 7652 n - the number of block indices in cc[] 7653 cc - the block indices (must be large enough to contain the indices) 7654 */ 7655 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7656 { 7657 PetscInt cnt = -1, nidx, j; 7658 const PetscInt *idx; 7659 7660 PetscFunctionBegin; 7661 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7662 if (nidx) { 7663 cnt = 0; 7664 cc[cnt] = idx[0] / bs; 7665 for (j = 1; j < nidx; j++) { 7666 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7667 } 7668 } 7669 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7670 *n = cnt + 1; 7671 PetscFunctionReturn(PETSC_SUCCESS); 7672 } 7673 7674 /* 7675 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7676 7677 ncollapsed - the number of block indices 7678 collapsed - the block indices (must be large enough to contain the indices) 7679 */ 7680 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7681 { 7682 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7683 7684 PetscFunctionBegin; 7685 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7686 for (i = start + 1; i < start + bs; i++) { 7687 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7688 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7689 cprevtmp = cprev; 7690 cprev = merged; 7691 merged = cprevtmp; 7692 } 7693 *ncollapsed = nprev; 7694 if (collapsed) *collapsed = cprev; 7695 PetscFunctionReturn(PETSC_SUCCESS); 7696 } 7697 7698 /* 7699 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7700 */ 7701 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7702 { 7703 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7704 Mat tGmat; 7705 MPI_Comm comm; 7706 const PetscScalar *vals; 7707 const PetscInt *idx; 7708 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7709 MatScalar *AA; // this is checked in graph 7710 PetscBool isseqaij; 7711 Mat a, b, c; 7712 MatType jtype; 7713 7714 PetscFunctionBegin; 7715 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7716 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7717 PetscCall(MatGetType(Gmat, &jtype)); 7718 PetscCall(MatCreate(comm, &tGmat)); 7719 PetscCall(MatSetType(tGmat, jtype)); 7720 7721 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7722 Also, if the matrix is symmetric, can we skip this 7723 operation? It can be very expensive on large matrices. */ 7724 7725 // global sizes 7726 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7727 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7728 nloc = Iend - Istart; 7729 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7730 if (isseqaij) { 7731 a = Gmat; 7732 b = NULL; 7733 } else { 7734 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7735 a = d->A; 7736 b = d->B; 7737 garray = d->garray; 7738 } 7739 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7740 for (PetscInt row = 0; row < nloc; row++) { 7741 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7742 d_nnz[row] = ncols; 7743 if (ncols > maxcols) maxcols = ncols; 7744 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7745 } 7746 if (b) { 7747 for (PetscInt row = 0; row < nloc; row++) { 7748 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7749 o_nnz[row] = ncols; 7750 if (ncols > maxcols) maxcols = ncols; 7751 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7752 } 7753 } 7754 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7755 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7756 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7757 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7758 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7759 PetscCall(PetscFree2(d_nnz, o_nnz)); 7760 // 7761 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7762 nnz0 = nnz1 = 0; 7763 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7764 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7765 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7766 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7767 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7768 if (PetscRealPart(sv) > vfilter) { 7769 nnz1++; 7770 PetscInt cid = idx[jj] + Istart; //diag 7771 if (c != a) cid = garray[idx[jj]]; 7772 AA[ncol_row] = vals[jj]; 7773 AJ[ncol_row] = cid; 7774 ncol_row++; 7775 } 7776 } 7777 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7778 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7779 } 7780 } 7781 PetscCall(PetscFree2(AA, AJ)); 7782 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7783 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7784 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7785 7786 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7787 7788 *filteredG = tGmat; 7789 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7790 PetscFunctionReturn(PETSC_SUCCESS); 7791 } 7792 7793 /* 7794 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7795 7796 Input Parameter: 7797 . Amat - matrix 7798 - symmetrize - make the result symmetric 7799 + scale - scale with diagonal 7800 7801 Output Parameter: 7802 . a_Gmat - output scalar graph >= 0 7803 7804 */ 7805 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7806 { 7807 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7808 MPI_Comm comm; 7809 Mat Gmat; 7810 PetscBool ismpiaij, isseqaij; 7811 Mat a, b, c; 7812 MatType jtype; 7813 7814 PetscFunctionBegin; 7815 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7816 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7817 PetscCall(MatGetSize(Amat, &MM, &NN)); 7818 PetscCall(MatGetBlockSize(Amat, &bs)); 7819 nloc = (Iend - Istart) / bs; 7820 7821 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7822 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7823 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7824 7825 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7826 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7827 implementation */ 7828 if (bs > 1) { 7829 PetscCall(MatGetType(Amat, &jtype)); 7830 PetscCall(MatCreate(comm, &Gmat)); 7831 PetscCall(MatSetType(Gmat, jtype)); 7832 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7833 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7834 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7835 PetscInt *d_nnz, *o_nnz; 7836 MatScalar *aa, val, *AA; 7837 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7838 if (isseqaij) { 7839 a = Amat; 7840 b = NULL; 7841 } else { 7842 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7843 a = d->A; 7844 b = d->B; 7845 } 7846 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7847 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7848 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7849 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7850 const PetscInt *cols; 7851 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7852 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7853 nnz[brow / bs] = jj / bs; 7854 if (jj % bs) ok = 0; 7855 if (cols) j0 = cols[0]; 7856 else j0 = -1; 7857 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7858 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7859 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7860 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7861 if (jj % bs) ok = 0; 7862 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7863 if (nnz[brow / bs] != jj / bs) ok = 0; 7864 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7865 } 7866 if (!ok) { 7867 PetscCall(PetscFree2(d_nnz, o_nnz)); 7868 goto old_bs; 7869 } 7870 } 7871 } 7872 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7873 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7874 PetscCall(PetscFree2(d_nnz, o_nnz)); 7875 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7876 // diag 7877 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7878 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7879 ai = aseq->i; 7880 n = ai[brow + 1] - ai[brow]; 7881 aj = aseq->j + ai[brow]; 7882 for (int k = 0; k < n; k += bs) { // block columns 7883 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7884 val = 0; 7885 for (int ii = 0; ii < bs; ii++) { // rows in block 7886 aa = aseq->a + ai[brow + ii] + k; 7887 for (int jj = 0; jj < bs; jj++) { // columns in block 7888 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7889 } 7890 } 7891 AA[k / bs] = val; 7892 } 7893 grow = Istart / bs + brow / bs; 7894 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7895 } 7896 // off-diag 7897 if (ismpiaij) { 7898 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7899 const PetscScalar *vals; 7900 const PetscInt *cols, *garray = aij->garray; 7901 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7902 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7903 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7904 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7905 AA[k / bs] = 0; 7906 AJ[cidx] = garray[cols[k]] / bs; 7907 } 7908 nc = ncols / bs; 7909 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7910 for (int ii = 0; ii < bs; ii++) { // rows in block 7911 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7912 for (int k = 0; k < ncols; k += bs) { 7913 for (int jj = 0; jj < bs; jj++) { // cols in block 7914 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7915 } 7916 } 7917 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7918 } 7919 grow = Istart / bs + brow / bs; 7920 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7921 } 7922 } 7923 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7924 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7925 PetscCall(PetscFree2(AA, AJ)); 7926 } else { 7927 const PetscScalar *vals; 7928 const PetscInt *idx; 7929 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7930 old_bs: 7931 /* 7932 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7933 */ 7934 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7935 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7936 if (isseqaij) { 7937 PetscInt max_d_nnz; 7938 /* 7939 Determine exact preallocation count for (sequential) scalar matrix 7940 */ 7941 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7942 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7943 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7944 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7945 PetscCall(PetscFree3(w0, w1, w2)); 7946 } else if (ismpiaij) { 7947 Mat Daij, Oaij; 7948 const PetscInt *garray; 7949 PetscInt max_d_nnz; 7950 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7951 /* 7952 Determine exact preallocation count for diagonal block portion of scalar matrix 7953 */ 7954 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7955 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7956 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7957 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7958 PetscCall(PetscFree3(w0, w1, w2)); 7959 /* 7960 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7961 */ 7962 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7963 o_nnz[jj] = 0; 7964 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7965 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7966 o_nnz[jj] += ncols; 7967 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7968 } 7969 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7970 } 7971 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7972 /* get scalar copy (norms) of matrix */ 7973 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7974 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7975 PetscCall(PetscFree2(d_nnz, o_nnz)); 7976 for (Ii = Istart; Ii < Iend; Ii++) { 7977 PetscInt dest_row = Ii / bs; 7978 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7979 for (jj = 0; jj < ncols; jj++) { 7980 PetscInt dest_col = idx[jj] / bs; 7981 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7982 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7983 } 7984 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7985 } 7986 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7987 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7988 } 7989 } else { 7990 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7991 else { 7992 Gmat = Amat; 7993 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7994 } 7995 if (isseqaij) { 7996 a = Gmat; 7997 b = NULL; 7998 } else { 7999 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8000 a = d->A; 8001 b = d->B; 8002 } 8003 if (filter >= 0 || scale) { 8004 /* take absolute value of each entry */ 8005 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8006 MatInfo info; 8007 PetscScalar *avals; 8008 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8009 PetscCall(MatSeqAIJGetArray(c, &avals)); 8010 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8011 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8012 } 8013 } 8014 } 8015 if (symmetrize) { 8016 PetscBool isset, issym; 8017 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8018 if (!isset || !issym) { 8019 Mat matTrans; 8020 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8021 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8022 PetscCall(MatDestroy(&matTrans)); 8023 } 8024 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8025 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8026 if (scale) { 8027 /* scale c for all diagonal values = 1 or -1 */ 8028 Vec diag; 8029 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8030 PetscCall(MatGetDiagonal(Gmat, diag)); 8031 PetscCall(VecReciprocal(diag)); 8032 PetscCall(VecSqrtAbs(diag)); 8033 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8034 PetscCall(VecDestroy(&diag)); 8035 } 8036 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8037 8038 if (filter >= 0) { 8039 Mat Fmat = NULL; /* some silly compiler needs this */ 8040 8041 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8042 PetscCall(MatDestroy(&Gmat)); 8043 Gmat = Fmat; 8044 } 8045 *a_Gmat = Gmat; 8046 PetscFunctionReturn(PETSC_SUCCESS); 8047 } 8048 8049 /* 8050 Special version for direct calls from Fortran 8051 */ 8052 #include <petsc/private/fortranimpl.h> 8053 8054 /* Change these macros so can be used in void function */ 8055 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8056 #undef PetscCall 8057 #define PetscCall(...) \ 8058 do { \ 8059 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8060 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8061 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8062 return; \ 8063 } \ 8064 } while (0) 8065 8066 #undef SETERRQ 8067 #define SETERRQ(comm, ierr, ...) \ 8068 do { \ 8069 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8070 return; \ 8071 } while (0) 8072 8073 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8074 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8075 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8076 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8077 #else 8078 #endif 8079 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8080 { 8081 Mat mat = *mmat; 8082 PetscInt m = *mm, n = *mn; 8083 InsertMode addv = *maddv; 8084 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8085 PetscScalar value; 8086 8087 MatCheckPreallocated(mat, 1); 8088 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8089 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8090 { 8091 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8092 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8093 PetscBool roworiented = aij->roworiented; 8094 8095 /* Some Variables required in the macro */ 8096 Mat A = aij->A; 8097 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8098 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8099 MatScalar *aa; 8100 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8101 Mat B = aij->B; 8102 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8103 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8104 MatScalar *ba; 8105 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8106 * cannot use "#if defined" inside a macro. */ 8107 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8108 8109 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8110 PetscInt nonew = a->nonew; 8111 MatScalar *ap1, *ap2; 8112 8113 PetscFunctionBegin; 8114 PetscCall(MatSeqAIJGetArray(A, &aa)); 8115 PetscCall(MatSeqAIJGetArray(B, &ba)); 8116 for (i = 0; i < m; i++) { 8117 if (im[i] < 0) continue; 8118 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8119 if (im[i] >= rstart && im[i] < rend) { 8120 row = im[i] - rstart; 8121 lastcol1 = -1; 8122 rp1 = aj + ai[row]; 8123 ap1 = aa + ai[row]; 8124 rmax1 = aimax[row]; 8125 nrow1 = ailen[row]; 8126 low1 = 0; 8127 high1 = nrow1; 8128 lastcol2 = -1; 8129 rp2 = bj + bi[row]; 8130 ap2 = ba + bi[row]; 8131 rmax2 = bimax[row]; 8132 nrow2 = bilen[row]; 8133 low2 = 0; 8134 high2 = nrow2; 8135 8136 for (j = 0; j < n; j++) { 8137 if (roworiented) value = v[i * n + j]; 8138 else value = v[i + j * m]; 8139 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8140 if (in[j] >= cstart && in[j] < cend) { 8141 col = in[j] - cstart; 8142 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8143 } else if (in[j] < 0) continue; 8144 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8145 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8146 } else { 8147 if (mat->was_assembled) { 8148 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8149 #if defined(PETSC_USE_CTABLE) 8150 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8151 col--; 8152 #else 8153 col = aij->colmap[in[j]] - 1; 8154 #endif 8155 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8156 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8157 col = in[j]; 8158 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8159 B = aij->B; 8160 b = (Mat_SeqAIJ *)B->data; 8161 bimax = b->imax; 8162 bi = b->i; 8163 bilen = b->ilen; 8164 bj = b->j; 8165 rp2 = bj + bi[row]; 8166 ap2 = ba + bi[row]; 8167 rmax2 = bimax[row]; 8168 nrow2 = bilen[row]; 8169 low2 = 0; 8170 high2 = nrow2; 8171 bm = aij->B->rmap->n; 8172 ba = b->a; 8173 inserted = PETSC_FALSE; 8174 } 8175 } else col = in[j]; 8176 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8177 } 8178 } 8179 } else if (!aij->donotstash) { 8180 if (roworiented) { 8181 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8182 } else { 8183 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8184 } 8185 } 8186 } 8187 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8188 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8189 } 8190 PetscFunctionReturnVoid(); 8191 } 8192 8193 /* Undefining these here since they were redefined from their original definition above! No 8194 * other PETSc functions should be defined past this point, as it is impossible to recover the 8195 * original definitions */ 8196 #undef PetscCall 8197 #undef SETERRQ 8198