1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 #if defined(PETSC_USE_LOG) 15 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 16 #endif 17 PetscCall(MatStashDestroy_Private(&mat->stash)); 18 PetscCall(VecDestroy(&aij->diag)); 19 PetscCall(MatDestroy(&aij->A)); 20 PetscCall(MatDestroy(&aij->B)); 21 #if defined(PETSC_USE_CTABLE) 22 PetscCall(PetscHMapIDestroy(&aij->colmap)); 23 #else 24 PetscCall(PetscFree(aij->colmap)); 25 #endif 26 PetscCall(PetscFree(aij->garray)); 27 PetscCall(VecDestroy(&aij->lvec)); 28 PetscCall(VecScatterDestroy(&aij->Mvctx)); 29 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 30 PetscCall(PetscFree(aij->ld)); 31 32 /* Free COO */ 33 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 34 35 PetscCall(PetscFree(mat->data)); 36 37 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 38 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 39 40 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 50 #if defined(PETSC_HAVE_CUDA) 51 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 52 #endif 53 #if defined(PETSC_HAVE_HIP) 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 55 #endif 56 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 57 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 58 #endif 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 60 #if defined(PETSC_HAVE_ELEMENTAL) 61 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 62 #endif 63 #if defined(PETSC_HAVE_SCALAPACK) 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 65 #endif 66 #if defined(PETSC_HAVE_HYPRE) 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 69 #endif 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 76 #if defined(PETSC_HAVE_MKL_SPARSE) 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 78 #endif 79 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 82 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 84 PetscFunctionReturn(PETSC_SUCCESS); 85 } 86 87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 88 #define TYPE AIJ 89 #define TYPE_AIJ 90 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 91 #undef TYPE 92 #undef TYPE_AIJ 93 94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 95 { 96 Mat B; 97 98 PetscFunctionBegin; 99 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 100 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 101 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 102 PetscCall(MatDestroy(&B)); 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 107 { 108 Mat B; 109 110 PetscFunctionBegin; 111 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 112 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 113 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 /*MC 118 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 119 120 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 121 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 122 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 123 for communicators controlling multiple processes. It is recommended that you call both of 124 the above preallocation routines for simplicity. 125 126 Options Database Key: 127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 128 129 Developer Note: 130 Level: beginner 131 132 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 133 enough exist. 134 135 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 136 M*/ 137 138 /*MC 139 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 140 141 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 142 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 143 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 144 for communicators controlling multiple processes. It is recommended that you call both of 145 the above preallocation routines for simplicity. 146 147 Options Database Key: 148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 149 150 Level: beginner 151 152 .seealso: [](chapter_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 153 M*/ 154 155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 156 { 157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 158 159 PetscFunctionBegin; 160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 161 A->boundtocpu = flg; 162 #endif 163 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 164 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 165 166 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 167 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 168 * to differ from the parent matrix. */ 169 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 170 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 171 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 176 { 177 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 178 179 PetscFunctionBegin; 180 if (mat->A) { 181 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 182 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 188 { 189 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 191 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 192 const PetscInt *ia, *ib; 193 const MatScalar *aa, *bb, *aav, *bav; 194 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 195 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 196 197 PetscFunctionBegin; 198 *keptrows = NULL; 199 200 ia = a->i; 201 ib = b->i; 202 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 203 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 204 for (i = 0; i < m; i++) { 205 na = ia[i + 1] - ia[i]; 206 nb = ib[i + 1] - ib[i]; 207 if (!na && !nb) { 208 cnt++; 209 goto ok1; 210 } 211 aa = aav + ia[i]; 212 for (j = 0; j < na; j++) { 213 if (aa[j] != 0.0) goto ok1; 214 } 215 bb = bav + ib[i]; 216 for (j = 0; j < nb; j++) { 217 if (bb[j] != 0.0) goto ok1; 218 } 219 cnt++; 220 ok1:; 221 } 222 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 223 if (!n0rows) { 224 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 225 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 229 cnt = 0; 230 for (i = 0; i < m; i++) { 231 na = ia[i + 1] - ia[i]; 232 nb = ib[i + 1] - ib[i]; 233 if (!na && !nb) continue; 234 aa = aav + ia[i]; 235 for (j = 0; j < na; j++) { 236 if (aa[j] != 0.0) { 237 rows[cnt++] = rstart + i; 238 goto ok2; 239 } 240 } 241 bb = bav + ib[i]; 242 for (j = 0; j < nb; j++) { 243 if (bb[j] != 0.0) { 244 rows[cnt++] = rstart + i; 245 goto ok2; 246 } 247 } 248 ok2:; 249 } 250 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 251 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 252 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 257 { 258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 259 PetscBool cong; 260 261 PetscFunctionBegin; 262 PetscCall(MatHasCongruentLayouts(Y, &cong)); 263 if (Y->assembled && cong) { 264 PetscCall(MatDiagonalSet(aij->A, D, is)); 265 } else { 266 PetscCall(MatDiagonalSet_Default(Y, D, is)); 267 } 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 274 PetscInt i, rstart, nrows, *rows; 275 276 PetscFunctionBegin; 277 *zrows = NULL; 278 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 279 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 280 for (i = 0; i < nrows; i++) rows[i] += rstart; 281 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 288 PetscInt i, m, n, *garray = aij->garray; 289 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 290 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 291 PetscReal *work; 292 const PetscScalar *dummy; 293 294 PetscFunctionBegin; 295 PetscCall(MatGetSize(A, &m, &n)); 296 PetscCall(PetscCalloc1(n, &work)); 297 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 298 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 299 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 300 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 301 if (type == NORM_2) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 304 } else if (type == NORM_1) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 307 } else if (type == NORM_INFINITY) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 310 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 311 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 312 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 313 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 314 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 315 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 316 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 317 if (type == NORM_INFINITY) { 318 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 319 } else { 320 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 321 } 322 PetscCall(PetscFree(work)); 323 if (type == NORM_2) { 324 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 325 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 326 for (i = 0; i < n; i++) reductions[i] /= m; 327 } 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 332 { 333 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 334 IS sis, gis; 335 const PetscInt *isis, *igis; 336 PetscInt n, *iis, nsis, ngis, rstart, i; 337 338 PetscFunctionBegin; 339 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 340 PetscCall(MatFindNonzeroRows(a->B, &gis)); 341 PetscCall(ISGetSize(gis, &ngis)); 342 PetscCall(ISGetSize(sis, &nsis)); 343 PetscCall(ISGetIndices(sis, &isis)); 344 PetscCall(ISGetIndices(gis, &igis)); 345 346 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 347 PetscCall(PetscArraycpy(iis, igis, ngis)); 348 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 349 n = ngis + nsis; 350 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 351 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 352 for (i = 0; i < n; i++) iis[i] += rstart; 353 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 354 355 PetscCall(ISRestoreIndices(sis, &isis)); 356 PetscCall(ISRestoreIndices(gis, &igis)); 357 PetscCall(ISDestroy(&sis)); 358 PetscCall(ISDestroy(&gis)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to access. 368 */ 369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 370 { 371 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 372 PetscInt n = aij->B->cmap->n, i; 373 374 PetscFunctionBegin; 375 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 376 #if defined(PETSC_USE_CTABLE) 377 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 378 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 379 #else 380 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 381 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 382 #endif 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 387 { \ 388 if (col <= lastcol1) low1 = 0; \ 389 else high1 = nrow1; \ 390 lastcol1 = col; \ 391 while (high1 - low1 > 5) { \ 392 t = (low1 + high1) / 2; \ 393 if (rp1[t] > col) high1 = t; \ 394 else low1 = t; \ 395 } \ 396 for (_i = low1; _i < high1; _i++) { \ 397 if (rp1[_i] > col) break; \ 398 if (rp1[_i] == col) { \ 399 if (addv == ADD_VALUES) { \ 400 ap1[_i] += value; \ 401 /* Not sure LogFlops will slow dow the code or not */ \ 402 (void)PetscLogFlops(1.0); \ 403 } else ap1[_i] = value; \ 404 goto a_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries && row != col) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 if (nonew == 1) { \ 413 low1 = 0; \ 414 high1 = nrow1; \ 415 goto a_noinsert; \ 416 } \ 417 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 418 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 419 N = nrow1++ - 1; \ 420 a->nz++; \ 421 high1++; \ 422 /* shift up all the later entries in this row */ \ 423 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 424 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 425 rp1[_i] = col; \ 426 ap1[_i] = value; \ 427 A->nonzerostate++; \ 428 a_noinsert:; \ 429 ailen[row] = nrow1; \ 430 } 431 432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 433 { \ 434 if (col <= lastcol2) low2 = 0; \ 435 else high2 = nrow2; \ 436 lastcol2 = col; \ 437 while (high2 - low2 > 5) { \ 438 t = (low2 + high2) / 2; \ 439 if (rp2[t] > col) high2 = t; \ 440 else low2 = t; \ 441 } \ 442 for (_i = low2; _i < high2; _i++) { \ 443 if (rp2[_i] > col) break; \ 444 if (rp2[_i] == col) { \ 445 if (addv == ADD_VALUES) { \ 446 ap2[_i] += value; \ 447 (void)PetscLogFlops(1.0); \ 448 } else ap2[_i] = value; \ 449 goto b_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 if (nonew == 1) { \ 458 low2 = 0; \ 459 high2 = nrow2; \ 460 goto b_noinsert; \ 461 } \ 462 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 464 N = nrow2++ - 1; \ 465 b->nz++; \ 466 high2++; \ 467 /* shift up all the later entries in this row */ \ 468 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 469 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 470 rp2[_i] = col; \ 471 ap2[_i] = value; \ 472 B->nonzerostate++; \ 473 b_noinsert:; \ 474 bilen[row] = nrow2; \ 475 } 476 477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 478 { 479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 481 PetscInt l, *garray = mat->garray, diag; 482 PetscScalar *aa, *ba; 483 484 PetscFunctionBegin; 485 /* code only works for square matrices A */ 486 487 /* find size of row to the left of the diagonal part */ 488 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 489 row = row - diag; 490 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 491 if (garray[b->j[b->i[row] + l]] > diag) break; 492 } 493 if (l) { 494 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 495 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 496 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 497 } 498 499 /* diagonal part */ 500 if (a->i[row + 1] - a->i[row]) { 501 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 502 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 503 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 504 } 505 506 /* right of diagonal part */ 507 if (b->i[row + 1] - b->i[row] - l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 PetscFunctionReturn(PETSC_SUCCESS); 513 } 514 515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 516 { 517 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 518 PetscScalar value = 0.0; 519 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 520 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 521 PetscBool roworiented = aij->roworiented; 522 523 /* Some Variables required in the macro */ 524 Mat A = aij->A; 525 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 526 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 527 PetscBool ignorezeroentries = a->ignorezeroentries; 528 Mat B = aij->B; 529 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 530 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 531 MatScalar *aa, *ba; 532 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 533 PetscInt nonew; 534 MatScalar *ap1, *ap2; 535 536 PetscFunctionBegin; 537 PetscCall(MatSeqAIJGetArray(A, &aa)); 538 PetscCall(MatSeqAIJGetArray(B, &ba)); 539 for (i = 0; i < m; i++) { 540 if (im[i] < 0) continue; 541 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 542 if (im[i] >= rstart && im[i] < rend) { 543 row = im[i] - rstart; 544 lastcol1 = -1; 545 rp1 = aj + ai[row]; 546 ap1 = aa + ai[row]; 547 rmax1 = aimax[row]; 548 nrow1 = ailen[row]; 549 low1 = 0; 550 high1 = nrow1; 551 lastcol2 = -1; 552 rp2 = bj + bi[row]; 553 ap2 = ba + bi[row]; 554 rmax2 = bimax[row]; 555 nrow2 = bilen[row]; 556 low2 = 0; 557 high2 = nrow2; 558 559 for (j = 0; j < n; j++) { 560 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 561 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 562 if (in[j] >= cstart && in[j] < cend) { 563 col = in[j] - cstart; 564 nonew = a->nonew; 565 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 566 } else if (in[j] < 0) { 567 continue; 568 } else { 569 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 570 if (mat->was_assembled) { 571 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 572 #if defined(PETSC_USE_CTABLE) 573 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 574 col--; 575 #else 576 col = aij->colmap[in[j]] - 1; 577 #endif 578 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 579 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 580 col = in[j]; 581 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 582 B = aij->B; 583 b = (Mat_SeqAIJ *)B->data; 584 bimax = b->imax; 585 bi = b->i; 586 bilen = b->ilen; 587 bj = b->j; 588 ba = b->a; 589 rp2 = bj + bi[row]; 590 ap2 = ba + bi[row]; 591 rmax2 = bimax[row]; 592 nrow2 = bilen[row]; 593 low2 = 0; 594 high2 = nrow2; 595 bm = aij->B->rmap->n; 596 ba = b->a; 597 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 598 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 599 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 600 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 601 } 602 } else col = in[j]; 603 nonew = b->nonew; 604 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 605 } 606 } 607 } else { 608 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 609 if (!aij->donotstash) { 610 mat->assembled = PETSC_FALSE; 611 if (roworiented) { 612 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 613 } else { 614 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 615 } 616 } 617 } 618 } 619 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 620 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 /* 625 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 626 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 627 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 628 */ 629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 632 Mat A = aij->A; /* diagonal part of the matrix */ 633 Mat B = aij->B; /* offdiagonal part of the matrix */ 634 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 635 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 636 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 637 PetscInt *ailen = a->ilen, *aj = a->j; 638 PetscInt *bilen = b->ilen, *bj = b->j; 639 PetscInt am = aij->A->rmap->n, j; 640 PetscInt diag_so_far = 0, dnz; 641 PetscInt offd_so_far = 0, onz; 642 643 PetscFunctionBegin; 644 /* Iterate over all rows of the matrix */ 645 for (j = 0; j < am; j++) { 646 dnz = onz = 0; 647 /* Iterate over all non-zero columns of the current row */ 648 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 649 /* If column is in the diagonal */ 650 if (mat_j[col] >= cstart && mat_j[col] < cend) { 651 aj[diag_so_far++] = mat_j[col] - cstart; 652 dnz++; 653 } else { /* off-diagonal entries */ 654 bj[offd_so_far++] = mat_j[col]; 655 onz++; 656 } 657 } 658 ailen[j] = dnz; 659 bilen[j] = onz; 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 /* 665 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 666 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 667 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 668 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 669 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 670 */ 671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 674 Mat A = aij->A; /* diagonal part of the matrix */ 675 Mat B = aij->B; /* offdiagonal part of the matrix */ 676 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 679 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 680 PetscInt *ailen = a->ilen, *aj = a->j; 681 PetscInt *bilen = b->ilen, *bj = b->j; 682 PetscInt am = aij->A->rmap->n, j; 683 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 684 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 685 PetscScalar *aa = a->a, *ba = b->a; 686 687 PetscFunctionBegin; 688 /* Iterate over all rows of the matrix */ 689 for (j = 0; j < am; j++) { 690 dnz_row = onz_row = 0; 691 rowstart_offd = full_offd_i[j]; 692 rowstart_diag = full_diag_i[j]; 693 /* Iterate over all non-zero columns of the current row */ 694 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 695 /* If column is in the diagonal */ 696 if (mat_j[col] >= cstart && mat_j[col] < cend) { 697 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 698 aa[rowstart_diag + dnz_row] = mat_a[col]; 699 dnz_row++; 700 } else { /* off-diagonal entries */ 701 bj[rowstart_offd + onz_row] = mat_j[col]; 702 ba[rowstart_offd + onz_row] = mat_a[col]; 703 onz_row++; 704 } 705 } 706 ailen[j] = dnz_row; 707 bilen[j] = onz_row; 708 } 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 715 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 716 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 717 718 PetscFunctionBegin; 719 for (i = 0; i < m; i++) { 720 if (idxm[i] < 0) continue; /* negative row */ 721 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 722 if (idxm[i] >= rstart && idxm[i] < rend) { 723 row = idxm[i] - rstart; 724 for (j = 0; j < n; j++) { 725 if (idxn[j] < 0) continue; /* negative column */ 726 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 727 if (idxn[j] >= cstart && idxn[j] < cend) { 728 col = idxn[j] - cstart; 729 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 730 } else { 731 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 732 #if defined(PETSC_USE_CTABLE) 733 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 734 col--; 735 #else 736 col = aij->colmap[idxn[j]] - 1; 737 #endif 738 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 739 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 740 } 741 } 742 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 743 } 744 PetscFunctionReturn(PETSC_SUCCESS); 745 } 746 747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 750 PetscInt nstash, reallocs; 751 752 PetscFunctionBegin; 753 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 754 755 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 756 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 757 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 762 { 763 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 764 PetscMPIInt n; 765 PetscInt i, j, rstart, ncols, flg; 766 PetscInt *row, *col; 767 PetscBool other_disassembled; 768 PetscScalar *val; 769 770 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 771 772 PetscFunctionBegin; 773 if (!aij->donotstash && !mat->nooffprocentries) { 774 while (1) { 775 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 776 if (!flg) break; 777 778 for (i = 0; i < n;) { 779 /* Now identify the consecutive vals belonging to the same row */ 780 for (j = i, rstart = row[j]; j < n; j++) { 781 if (row[j] != rstart) break; 782 } 783 if (j < n) ncols = j - i; 784 else ncols = n - i; 785 /* Now assemble all these values with a single function call */ 786 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 787 i = j; 788 } 789 } 790 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 791 } 792 #if defined(PETSC_HAVE_DEVICE) 793 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 794 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 795 if (mat->boundtocpu) { 796 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 797 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 798 } 799 #endif 800 PetscCall(MatAssemblyBegin(aij->A, mode)); 801 PetscCall(MatAssemblyEnd(aij->A, mode)); 802 803 /* determine if any processor has disassembled, if so we must 804 also disassemble ourself, in order that we may reassemble. */ 805 /* 806 if nonzero structure of submatrix B cannot change then we know that 807 no processor disassembled thus we can skip this stuff 808 */ 809 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 810 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 811 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 812 PetscCall(MatDisAssemble_MPIAIJ(mat)); 813 } 814 } 815 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 816 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 817 #if defined(PETSC_HAVE_DEVICE) 818 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 819 #endif 820 PetscCall(MatAssemblyBegin(aij->B, mode)); 821 PetscCall(MatAssemblyEnd(aij->B, mode)); 822 823 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 824 825 aij->rowvalues = NULL; 826 827 PetscCall(VecDestroy(&aij->diag)); 828 829 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 830 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 831 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 832 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 833 } 834 #if defined(PETSC_HAVE_DEVICE) 835 mat->offloadmask = PETSC_OFFLOAD_BOTH; 836 #endif 837 PetscFunctionReturn(PETSC_SUCCESS); 838 } 839 840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 841 { 842 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 843 844 PetscFunctionBegin; 845 PetscCall(MatZeroEntries(l->A)); 846 PetscCall(MatZeroEntries(l->B)); 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 851 { 852 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 853 PetscObjectState sA, sB; 854 PetscInt *lrows; 855 PetscInt r, len; 856 PetscBool cong, lch, gch; 857 858 PetscFunctionBegin; 859 /* get locally owned rows */ 860 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 861 PetscCall(MatHasCongruentLayouts(A, &cong)); 862 /* fix right hand side if needed */ 863 if (x && b) { 864 const PetscScalar *xx; 865 PetscScalar *bb; 866 867 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 868 PetscCall(VecGetArrayRead(x, &xx)); 869 PetscCall(VecGetArray(b, &bb)); 870 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 871 PetscCall(VecRestoreArrayRead(x, &xx)); 872 PetscCall(VecRestoreArray(b, &bb)); 873 } 874 875 sA = mat->A->nonzerostate; 876 sB = mat->B->nonzerostate; 877 878 if (diag != 0.0 && cong) { 879 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 880 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 881 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 882 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 883 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 884 PetscInt nnwA, nnwB; 885 PetscBool nnzA, nnzB; 886 887 nnwA = aijA->nonew; 888 nnwB = aijB->nonew; 889 nnzA = aijA->keepnonzeropattern; 890 nnzB = aijB->keepnonzeropattern; 891 if (!nnzA) { 892 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 893 aijA->nonew = 0; 894 } 895 if (!nnzB) { 896 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 897 aijB->nonew = 0; 898 } 899 /* Must zero here before the next loop */ 900 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 901 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 902 for (r = 0; r < len; ++r) { 903 const PetscInt row = lrows[r] + A->rmap->rstart; 904 if (row >= A->cmap->N) continue; 905 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 906 } 907 aijA->nonew = nnwA; 908 aijB->nonew = nnwB; 909 } else { 910 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 911 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 912 } 913 PetscCall(PetscFree(lrows)); 914 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 916 917 /* reduce nonzerostate */ 918 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 919 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 920 if (gch) A->nonzerostate++; 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 925 { 926 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 927 PetscMPIInt n = A->rmap->n; 928 PetscInt i, j, r, m, len = 0; 929 PetscInt *lrows, *owners = A->rmap->range; 930 PetscMPIInt p = 0; 931 PetscSFNode *rrows; 932 PetscSF sf; 933 const PetscScalar *xx; 934 PetscScalar *bb, *mask, *aij_a; 935 Vec xmask, lmask; 936 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 937 const PetscInt *aj, *ii, *ridx; 938 PetscScalar *aa; 939 940 PetscFunctionBegin; 941 /* Create SF where leaves are input rows and roots are owned rows */ 942 PetscCall(PetscMalloc1(n, &lrows)); 943 for (r = 0; r < n; ++r) lrows[r] = -1; 944 PetscCall(PetscMalloc1(N, &rrows)); 945 for (r = 0; r < N; ++r) { 946 const PetscInt idx = rows[r]; 947 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 948 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 949 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 950 } 951 rrows[r].rank = p; 952 rrows[r].index = rows[r] - owners[p]; 953 } 954 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 955 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 956 /* Collect flags for rows to be zeroed */ 957 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 958 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 959 PetscCall(PetscSFDestroy(&sf)); 960 /* Compress and put in row numbers */ 961 for (r = 0; r < n; ++r) 962 if (lrows[r] >= 0) lrows[len++] = r; 963 /* zero diagonal part of matrix */ 964 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 965 /* handle off diagonal part of matrix */ 966 PetscCall(MatCreateVecs(A, &xmask, NULL)); 967 PetscCall(VecDuplicate(l->lvec, &lmask)); 968 PetscCall(VecGetArray(xmask, &bb)); 969 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 970 PetscCall(VecRestoreArray(xmask, &bb)); 971 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 972 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 973 PetscCall(VecDestroy(&xmask)); 974 if (x && b) { /* this code is buggy when the row and column layout don't match */ 975 PetscBool cong; 976 977 PetscCall(MatHasCongruentLayouts(A, &cong)); 978 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 979 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 980 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecGetArrayRead(l->lvec, &xx)); 982 PetscCall(VecGetArray(b, &bb)); 983 } 984 PetscCall(VecGetArray(lmask, &mask)); 985 /* remove zeroed rows of off diagonal matrix */ 986 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 987 ii = aij->i; 988 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 989 /* loop over all elements of off process part of matrix zeroing removed columns*/ 990 if (aij->compressedrow.use) { 991 m = aij->compressedrow.nrows; 992 ii = aij->compressedrow.i; 993 ridx = aij->compressedrow.rindex; 994 for (i = 0; i < m; i++) { 995 n = ii[i + 1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij_a + ii[i]; 998 999 for (j = 0; j < n; j++) { 1000 if (PetscAbsScalar(mask[*aj])) { 1001 if (b) bb[*ridx] -= *aa * xx[*aj]; 1002 *aa = 0.0; 1003 } 1004 aa++; 1005 aj++; 1006 } 1007 ridx++; 1008 } 1009 } else { /* do not use compressed row format */ 1010 m = l->B->rmap->n; 1011 for (i = 0; i < m; i++) { 1012 n = ii[i + 1] - ii[i]; 1013 aj = aij->j + ii[i]; 1014 aa = aij_a + ii[i]; 1015 for (j = 0; j < n; j++) { 1016 if (PetscAbsScalar(mask[*aj])) { 1017 if (b) bb[i] -= *aa * xx[*aj]; 1018 *aa = 0.0; 1019 } 1020 aa++; 1021 aj++; 1022 } 1023 } 1024 } 1025 if (x && b) { 1026 PetscCall(VecRestoreArray(b, &bb)); 1027 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1028 } 1029 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1030 PetscCall(VecRestoreArray(lmask, &mask)); 1031 PetscCall(VecDestroy(&lmask)); 1032 PetscCall(PetscFree(lrows)); 1033 1034 /* only change matrix nonzero state if pattern was allowed to be changed */ 1035 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1036 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1037 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1038 } 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1045 PetscInt nt; 1046 VecScatter Mvctx = a->Mvctx; 1047 1048 PetscFunctionBegin; 1049 PetscCall(VecGetLocalSize(xx, &nt)); 1050 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1051 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1052 PetscUseTypeMethod(a->A, mult, xx, yy); 1053 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1054 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 VecScatter Mvctx = a->Mvctx; 1071 1072 PetscFunctionBegin; 1073 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1074 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1075 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1076 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1077 PetscFunctionReturn(PETSC_SUCCESS); 1078 } 1079 1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1083 1084 PetscFunctionBegin; 1085 /* do nondiagonal part */ 1086 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1087 /* do local part */ 1088 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1089 /* add partial results together */ 1090 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1091 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1092 PetscFunctionReturn(PETSC_SUCCESS); 1093 } 1094 1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1096 { 1097 MPI_Comm comm; 1098 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1099 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1100 IS Me, Notme; 1101 PetscInt M, N, first, last, *notme, i; 1102 PetscBool lf; 1103 PetscMPIInt size; 1104 1105 PetscFunctionBegin; 1106 /* Easy test: symmetric diagonal block */ 1107 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1108 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1109 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1110 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1111 PetscCallMPI(MPI_Comm_size(comm, &size)); 1112 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1113 1114 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1115 PetscCall(MatGetSize(Amat, &M, &N)); 1116 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1117 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1118 for (i = 0; i < first; i++) notme[i] = i; 1119 for (i = last; i < M; i++) notme[i - last + first] = i; 1120 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1121 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1122 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1123 Aoff = Aoffs[0]; 1124 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1125 Boff = Boffs[0]; 1126 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1127 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1128 PetscCall(MatDestroyMatrices(1, &Boffs)); 1129 PetscCall(ISDestroy(&Me)); 1130 PetscCall(ISDestroy(&Notme)); 1131 PetscCall(PetscFree(notme)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1136 { 1137 PetscFunctionBegin; 1138 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1145 1146 PetscFunctionBegin; 1147 /* do nondiagonal part */ 1148 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1149 /* do local part */ 1150 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1151 /* add partial results together */ 1152 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1153 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1154 PetscFunctionReturn(PETSC_SUCCESS); 1155 } 1156 1157 /* 1158 This only works correctly for square matrices where the subblock A->A is the 1159 diagonal block 1160 */ 1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1162 { 1163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1167 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1168 PetscCall(MatGetDiagonal(a->A, v)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatScale(a->A, aa)); 1178 PetscCall(MatScale(a->B, aa)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1184 { 1185 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1189 PetscCall(PetscFree(aij->Aperm1)); 1190 PetscCall(PetscFree(aij->Bperm1)); 1191 PetscCall(PetscFree(aij->Ajmap1)); 1192 PetscCall(PetscFree(aij->Bjmap1)); 1193 1194 PetscCall(PetscFree(aij->Aimap2)); 1195 PetscCall(PetscFree(aij->Bimap2)); 1196 PetscCall(PetscFree(aij->Aperm2)); 1197 PetscCall(PetscFree(aij->Bperm2)); 1198 PetscCall(PetscFree(aij->Ajmap2)); 1199 PetscCall(PetscFree(aij->Bjmap2)); 1200 1201 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1202 PetscCall(PetscFree(aij->Cperm1)); 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa, *ba; 1213 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1214 PetscInt64 nz, hnz; 1215 PetscInt *rowlens; 1216 PetscInt *colidxs; 1217 PetscScalar *matvals; 1218 PetscMPIInt rank; 1219 1220 PetscFunctionBegin; 1221 PetscCall(PetscViewerSetUp(viewer)); 1222 1223 M = mat->rmap->N; 1224 N = mat->cmap->N; 1225 m = mat->rmap->n; 1226 rs = mat->rmap->rstart; 1227 cs = mat->cmap->rstart; 1228 nz = A->nz + B->nz; 1229 1230 /* write matrix header */ 1231 header[0] = MAT_FILE_CLASSID; 1232 header[1] = M; 1233 header[2] = N; 1234 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1235 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1236 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1237 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1238 1239 /* fill in and store row lengths */ 1240 PetscCall(PetscMalloc1(m, &rowlens)); 1241 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1242 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1243 PetscCall(PetscFree(rowlens)); 1244 1245 /* fill in and store column indices */ 1246 PetscCall(PetscMalloc1(nz, &colidxs)); 1247 for (cnt = 0, i = 0; i < m; i++) { 1248 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1249 if (garray[B->j[jb]] > cs) break; 1250 colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1253 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1254 } 1255 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1256 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1257 PetscCall(PetscFree(colidxs)); 1258 1259 /* fill in and store nonzero values */ 1260 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1261 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1262 PetscCall(PetscMalloc1(nz, &matvals)); 1263 for (cnt = 0, i = 0; i < m; i++) { 1264 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1265 if (garray[B->j[jb]] > cs) break; 1266 matvals[cnt++] = ba[jb]; 1267 } 1268 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1269 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1270 } 1271 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1272 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1273 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1274 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1275 PetscCall(PetscFree(matvals)); 1276 1277 /* write block size option to the viewer's .info file */ 1278 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 #include <petscdraw.h> 1283 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1284 { 1285 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1286 PetscMPIInt rank = aij->rank, size = aij->size; 1287 PetscBool isdraw, iascii, isbinary; 1288 PetscViewer sviewer; 1289 PetscViewerFormat format; 1290 1291 PetscFunctionBegin; 1292 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1293 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1294 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1295 if (iascii) { 1296 PetscCall(PetscViewerGetFormat(viewer, &format)); 1297 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1298 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1299 PetscCall(PetscMalloc1(size, &nz)); 1300 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1301 for (i = 0; i < (PetscInt)size; i++) { 1302 nmax = PetscMax(nmax, nz[i]); 1303 nmin = PetscMin(nmin, nz[i]); 1304 navg += nz[i]; 1305 } 1306 PetscCall(PetscFree(nz)); 1307 navg = navg / size; 1308 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1309 PetscFunctionReturn(PETSC_SUCCESS); 1310 } 1311 PetscCall(PetscViewerGetFormat(viewer, &format)); 1312 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1313 MatInfo info; 1314 PetscInt *inodes = NULL; 1315 1316 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1317 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1318 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1319 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1320 if (!inodes) { 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1322 (double)info.memory)); 1323 } else { 1324 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1325 (double)info.memory)); 1326 } 1327 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1328 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1329 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1330 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1331 PetscCall(PetscViewerFlush(viewer)); 1332 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1333 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1334 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1335 PetscFunctionReturn(PETSC_SUCCESS); 1336 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1337 PetscInt inodecount, inodelimit, *inodes; 1338 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1339 if (inodes) { 1340 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1341 } else { 1342 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1343 } 1344 PetscFunctionReturn(PETSC_SUCCESS); 1345 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1346 PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 } else if (isbinary) { 1349 if (size == 1) { 1350 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1351 PetscCall(MatView(aij->A, viewer)); 1352 } else { 1353 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1354 } 1355 PetscFunctionReturn(PETSC_SUCCESS); 1356 } else if (iascii && size == 1) { 1357 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1358 PetscCall(MatView(aij->A, viewer)); 1359 PetscFunctionReturn(PETSC_SUCCESS); 1360 } else if (isdraw) { 1361 PetscDraw draw; 1362 PetscBool isnull; 1363 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1364 PetscCall(PetscDrawIsNull(draw, &isnull)); 1365 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1366 } 1367 1368 { /* assemble the entire matrix onto first processor */ 1369 Mat A = NULL, Av; 1370 IS isrow, iscol; 1371 1372 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1373 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1374 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1375 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1376 /* The commented code uses MatCreateSubMatrices instead */ 1377 /* 1378 Mat *AA, A = NULL, Av; 1379 IS isrow,iscol; 1380 1381 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1382 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1383 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1384 if (rank == 0) { 1385 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1386 A = AA[0]; 1387 Av = AA[0]; 1388 } 1389 PetscCall(MatDestroySubMatrices(1,&AA)); 1390 */ 1391 PetscCall(ISDestroy(&iscol)); 1392 PetscCall(ISDestroy(&isrow)); 1393 /* 1394 Everyone has to call to draw the matrix since the graphics waits are 1395 synchronized across all processors that share the PetscDraw object 1396 */ 1397 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1398 if (rank == 0) { 1399 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1400 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1401 } 1402 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1403 PetscCall(PetscViewerFlush(viewer)); 1404 PetscCall(MatDestroy(&A)); 1405 } 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1410 { 1411 PetscBool iascii, isdraw, issocket, isbinary; 1412 1413 PetscFunctionBegin; 1414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1415 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1416 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1417 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1418 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1419 PetscFunctionReturn(PETSC_SUCCESS); 1420 } 1421 1422 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1423 { 1424 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1425 Vec bb1 = NULL; 1426 PetscBool hasop; 1427 1428 PetscFunctionBegin; 1429 if (flag == SOR_APPLY_UPPER) { 1430 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1431 PetscFunctionReturn(PETSC_SUCCESS); 1432 } 1433 1434 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1435 1436 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1437 if (flag & SOR_ZERO_INITIAL_GUESS) { 1438 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1439 its--; 1440 } 1441 1442 while (its--) { 1443 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1444 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1445 1446 /* update rhs: bb1 = bb - B*x */ 1447 PetscCall(VecScale(mat->lvec, -1.0)); 1448 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1449 1450 /* local sweep */ 1451 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1452 } 1453 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1454 if (flag & SOR_ZERO_INITIAL_GUESS) { 1455 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1456 its--; 1457 } 1458 while (its--) { 1459 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1460 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1461 1462 /* update rhs: bb1 = bb - B*x */ 1463 PetscCall(VecScale(mat->lvec, -1.0)); 1464 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1465 1466 /* local sweep */ 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1468 } 1469 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1470 if (flag & SOR_ZERO_INITIAL_GUESS) { 1471 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1472 its--; 1473 } 1474 while (its--) { 1475 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1476 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1477 1478 /* update rhs: bb1 = bb - B*x */ 1479 PetscCall(VecScale(mat->lvec, -1.0)); 1480 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1481 1482 /* local sweep */ 1483 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1484 } 1485 } else if (flag & SOR_EISENSTAT) { 1486 Vec xx1; 1487 1488 PetscCall(VecDuplicate(bb, &xx1)); 1489 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1490 1491 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1492 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1493 if (!mat->diag) { 1494 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1495 PetscCall(MatGetDiagonal(matin, mat->diag)); 1496 } 1497 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1498 if (hasop) { 1499 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1500 } else { 1501 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1502 } 1503 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1504 1505 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1506 1507 /* local sweep */ 1508 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1509 PetscCall(VecAXPY(xx, 1.0, xx1)); 1510 PetscCall(VecDestroy(&xx1)); 1511 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1512 1513 PetscCall(VecDestroy(&bb1)); 1514 1515 matin->factorerrortype = mat->A->factorerrortype; 1516 PetscFunctionReturn(PETSC_SUCCESS); 1517 } 1518 1519 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1520 { 1521 Mat aA, aB, Aperm; 1522 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1523 PetscScalar *aa, *ba; 1524 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1525 PetscSF rowsf, sf; 1526 IS parcolp = NULL; 1527 PetscBool done; 1528 1529 PetscFunctionBegin; 1530 PetscCall(MatGetLocalSize(A, &m, &n)); 1531 PetscCall(ISGetIndices(rowp, &rwant)); 1532 PetscCall(ISGetIndices(colp, &cwant)); 1533 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1534 1535 /* Invert row permutation to find out where my rows should go */ 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1537 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1538 PetscCall(PetscSFSetFromOptions(rowsf)); 1539 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1540 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1541 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1542 1543 /* Invert column permutation to find out where my columns should go */ 1544 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1545 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1546 PetscCall(PetscSFSetFromOptions(sf)); 1547 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1548 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1549 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1550 PetscCall(PetscSFDestroy(&sf)); 1551 1552 PetscCall(ISRestoreIndices(rowp, &rwant)); 1553 PetscCall(ISRestoreIndices(colp, &cwant)); 1554 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1555 1556 /* Find out where my gcols should go */ 1557 PetscCall(MatGetSize(aB, NULL, &ng)); 1558 PetscCall(PetscMalloc1(ng, &gcdest)); 1559 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1560 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1561 PetscCall(PetscSFSetFromOptions(sf)); 1562 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1563 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1564 PetscCall(PetscSFDestroy(&sf)); 1565 1566 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1567 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1568 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1569 for (i = 0; i < m; i++) { 1570 PetscInt row = rdest[i]; 1571 PetscMPIInt rowner; 1572 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1573 for (j = ai[i]; j < ai[i + 1]; j++) { 1574 PetscInt col = cdest[aj[j]]; 1575 PetscMPIInt cowner; 1576 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1577 if (rowner == cowner) dnnz[i]++; 1578 else onnz[i]++; 1579 } 1580 for (j = bi[i]; j < bi[i + 1]; j++) { 1581 PetscInt col = gcdest[bj[j]]; 1582 PetscMPIInt cowner; 1583 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1584 if (rowner == cowner) dnnz[i]++; 1585 else onnz[i]++; 1586 } 1587 } 1588 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1589 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1590 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1591 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1592 PetscCall(PetscSFDestroy(&rowsf)); 1593 1594 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1595 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1596 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1597 for (i = 0; i < m; i++) { 1598 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1599 PetscInt j0, rowlen; 1600 rowlen = ai[i + 1] - ai[i]; 1601 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1602 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1603 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1604 } 1605 rowlen = bi[i + 1] - bi[i]; 1606 for (j0 = j = 0; j < rowlen; j0 = j) { 1607 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1608 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1609 } 1610 } 1611 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1612 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1613 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1614 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1615 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1616 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1617 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1618 PetscCall(PetscFree3(work, rdest, cdest)); 1619 PetscCall(PetscFree(gcdest)); 1620 if (parcolp) PetscCall(ISDestroy(&colp)); 1621 *B = Aperm; 1622 PetscFunctionReturn(PETSC_SUCCESS); 1623 } 1624 1625 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1626 { 1627 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1628 1629 PetscFunctionBegin; 1630 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1631 if (ghosts) *ghosts = aij->garray; 1632 PetscFunctionReturn(PETSC_SUCCESS); 1633 } 1634 1635 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1636 { 1637 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1638 Mat A = mat->A, B = mat->B; 1639 PetscLogDouble isend[5], irecv[5]; 1640 1641 PetscFunctionBegin; 1642 info->block_size = 1.0; 1643 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1644 1645 isend[0] = info->nz_used; 1646 isend[1] = info->nz_allocated; 1647 isend[2] = info->nz_unneeded; 1648 isend[3] = info->memory; 1649 isend[4] = info->mallocs; 1650 1651 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1652 1653 isend[0] += info->nz_used; 1654 isend[1] += info->nz_allocated; 1655 isend[2] += info->nz_unneeded; 1656 isend[3] += info->memory; 1657 isend[4] += info->mallocs; 1658 if (flag == MAT_LOCAL) { 1659 info->nz_used = isend[0]; 1660 info->nz_allocated = isend[1]; 1661 info->nz_unneeded = isend[2]; 1662 info->memory = isend[3]; 1663 info->mallocs = isend[4]; 1664 } else if (flag == MAT_GLOBAL_MAX) { 1665 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1666 1667 info->nz_used = irecv[0]; 1668 info->nz_allocated = irecv[1]; 1669 info->nz_unneeded = irecv[2]; 1670 info->memory = irecv[3]; 1671 info->mallocs = irecv[4]; 1672 } else if (flag == MAT_GLOBAL_SUM) { 1673 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1674 1675 info->nz_used = irecv[0]; 1676 info->nz_allocated = irecv[1]; 1677 info->nz_unneeded = irecv[2]; 1678 info->memory = irecv[3]; 1679 info->mallocs = irecv[4]; 1680 } 1681 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1682 info->fill_ratio_needed = 0; 1683 info->factor_mallocs = 0; 1684 PetscFunctionReturn(PETSC_SUCCESS); 1685 } 1686 1687 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1688 { 1689 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1690 1691 PetscFunctionBegin; 1692 switch (op) { 1693 case MAT_NEW_NONZERO_LOCATIONS: 1694 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1695 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1696 case MAT_KEEP_NONZERO_PATTERN: 1697 case MAT_NEW_NONZERO_LOCATION_ERR: 1698 case MAT_USE_INODES: 1699 case MAT_IGNORE_ZERO_ENTRIES: 1700 case MAT_FORM_EXPLICIT_TRANSPOSE: 1701 MatCheckPreallocated(A, 1); 1702 PetscCall(MatSetOption(a->A, op, flg)); 1703 PetscCall(MatSetOption(a->B, op, flg)); 1704 break; 1705 case MAT_ROW_ORIENTED: 1706 MatCheckPreallocated(A, 1); 1707 a->roworiented = flg; 1708 1709 PetscCall(MatSetOption(a->A, op, flg)); 1710 PetscCall(MatSetOption(a->B, op, flg)); 1711 break; 1712 case MAT_FORCE_DIAGONAL_ENTRIES: 1713 case MAT_SORTED_FULL: 1714 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1715 break; 1716 case MAT_IGNORE_OFF_PROC_ENTRIES: 1717 a->donotstash = flg; 1718 break; 1719 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1720 case MAT_SPD: 1721 case MAT_SYMMETRIC: 1722 case MAT_STRUCTURALLY_SYMMETRIC: 1723 case MAT_HERMITIAN: 1724 case MAT_SYMMETRY_ETERNAL: 1725 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1726 case MAT_SPD_ETERNAL: 1727 /* if the diagonal matrix is square it inherits some of the properties above */ 1728 break; 1729 case MAT_SUBMAT_SINGLEIS: 1730 A->submat_singleis = flg; 1731 break; 1732 case MAT_STRUCTURE_ONLY: 1733 /* The option is handled directly by MatSetOption() */ 1734 break; 1735 default: 1736 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1737 } 1738 PetscFunctionReturn(PETSC_SUCCESS); 1739 } 1740 1741 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1742 { 1743 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1744 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1745 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1746 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1747 PetscInt *cmap, *idx_p; 1748 1749 PetscFunctionBegin; 1750 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1751 mat->getrowactive = PETSC_TRUE; 1752 1753 if (!mat->rowvalues && (idx || v)) { 1754 /* 1755 allocate enough space to hold information from the longest row. 1756 */ 1757 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1758 PetscInt max = 1, tmp; 1759 for (i = 0; i < matin->rmap->n; i++) { 1760 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1761 if (max < tmp) max = tmp; 1762 } 1763 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1764 } 1765 1766 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1767 lrow = row - rstart; 1768 1769 pvA = &vworkA; 1770 pcA = &cworkA; 1771 pvB = &vworkB; 1772 pcB = &cworkB; 1773 if (!v) { 1774 pvA = NULL; 1775 pvB = NULL; 1776 } 1777 if (!idx) { 1778 pcA = NULL; 1779 if (!v) pcB = NULL; 1780 } 1781 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 nztot = nzA + nzB; 1784 1785 cmap = mat->garray; 1786 if (v || idx) { 1787 if (nztot) { 1788 /* Sort by increasing column numbers, assuming A and B already sorted */ 1789 PetscInt imark = -1; 1790 if (v) { 1791 *v = v_p = mat->rowvalues; 1792 for (i = 0; i < nzB; i++) { 1793 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1794 else break; 1795 } 1796 imark = i; 1797 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1798 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1799 } 1800 if (idx) { 1801 *idx = idx_p = mat->rowindices; 1802 if (imark > -1) { 1803 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1804 } else { 1805 for (i = 0; i < nzB; i++) { 1806 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1807 else break; 1808 } 1809 imark = i; 1810 } 1811 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1812 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1813 } 1814 } else { 1815 if (idx) *idx = NULL; 1816 if (v) *v = NULL; 1817 } 1818 } 1819 *nz = nztot; 1820 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1821 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1822 PetscFunctionReturn(PETSC_SUCCESS); 1823 } 1824 1825 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1826 { 1827 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1828 1829 PetscFunctionBegin; 1830 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1831 aij->getrowactive = PETSC_FALSE; 1832 PetscFunctionReturn(PETSC_SUCCESS); 1833 } 1834 1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1836 { 1837 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1838 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1839 PetscInt i, j, cstart = mat->cmap->rstart; 1840 PetscReal sum = 0.0; 1841 const MatScalar *v, *amata, *bmata; 1842 1843 PetscFunctionBegin; 1844 if (aij->size == 1) { 1845 PetscCall(MatNorm(aij->A, type, norm)); 1846 } else { 1847 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1848 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1849 if (type == NORM_FROBENIUS) { 1850 v = amata; 1851 for (i = 0; i < amat->nz; i++) { 1852 sum += PetscRealPart(PetscConj(*v) * (*v)); 1853 v++; 1854 } 1855 v = bmata; 1856 for (i = 0; i < bmat->nz; i++) { 1857 sum += PetscRealPart(PetscConj(*v) * (*v)); 1858 v++; 1859 } 1860 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1861 *norm = PetscSqrtReal(*norm); 1862 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1863 } else if (type == NORM_1) { /* max column norm */ 1864 PetscReal *tmp, *tmp2; 1865 PetscInt *jj, *garray = aij->garray; 1866 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1867 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1868 *norm = 0.0; 1869 v = amata; 1870 jj = amat->j; 1871 for (j = 0; j < amat->nz; j++) { 1872 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1873 v++; 1874 } 1875 v = bmata; 1876 jj = bmat->j; 1877 for (j = 0; j < bmat->nz; j++) { 1878 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1879 v++; 1880 } 1881 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1882 for (j = 0; j < mat->cmap->N; j++) { 1883 if (tmp2[j] > *norm) *norm = tmp2[j]; 1884 } 1885 PetscCall(PetscFree(tmp)); 1886 PetscCall(PetscFree(tmp2)); 1887 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1888 } else if (type == NORM_INFINITY) { /* max row norm */ 1889 PetscReal ntemp = 0.0; 1890 for (j = 0; j < aij->A->rmap->n; j++) { 1891 v = amata + amat->i[j]; 1892 sum = 0.0; 1893 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1894 sum += PetscAbsScalar(*v); 1895 v++; 1896 } 1897 v = bmata + bmat->i[j]; 1898 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1899 sum += PetscAbsScalar(*v); 1900 v++; 1901 } 1902 if (sum > ntemp) ntemp = sum; 1903 } 1904 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1905 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1906 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1907 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1908 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1909 } 1910 PetscFunctionReturn(PETSC_SUCCESS); 1911 } 1912 1913 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1914 { 1915 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1916 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1917 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1918 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1919 Mat B, A_diag, *B_diag; 1920 const MatScalar *pbv, *bv; 1921 1922 PetscFunctionBegin; 1923 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1924 ma = A->rmap->n; 1925 na = A->cmap->n; 1926 mb = a->B->rmap->n; 1927 nb = a->B->cmap->n; 1928 ai = Aloc->i; 1929 aj = Aloc->j; 1930 bi = Bloc->i; 1931 bj = Bloc->j; 1932 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1933 PetscInt *d_nnz, *g_nnz, *o_nnz; 1934 PetscSFNode *oloc; 1935 PETSC_UNUSED PetscSF sf; 1936 1937 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1938 /* compute d_nnz for preallocation */ 1939 PetscCall(PetscArrayzero(d_nnz, na)); 1940 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1941 /* compute local off-diagonal contributions */ 1942 PetscCall(PetscArrayzero(g_nnz, nb)); 1943 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1944 /* map those to global */ 1945 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1946 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1947 PetscCall(PetscSFSetFromOptions(sf)); 1948 PetscCall(PetscArrayzero(o_nnz, na)); 1949 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1950 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1951 PetscCall(PetscSFDestroy(&sf)); 1952 1953 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1954 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1955 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1956 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1957 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1958 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1959 } else { 1960 B = *matout; 1961 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1962 } 1963 1964 b = (Mat_MPIAIJ *)B->data; 1965 A_diag = a->A; 1966 B_diag = &b->A; 1967 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1968 A_diag_ncol = A_diag->cmap->N; 1969 B_diag_ilen = sub_B_diag->ilen; 1970 B_diag_i = sub_B_diag->i; 1971 1972 /* Set ilen for diagonal of B */ 1973 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1974 1975 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1976 very quickly (=without using MatSetValues), because all writes are local. */ 1977 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1978 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1979 1980 /* copy over the B part */ 1981 PetscCall(PetscMalloc1(bi[mb], &cols)); 1982 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1983 pbv = bv; 1984 row = A->rmap->rstart; 1985 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1986 cols_tmp = cols; 1987 for (i = 0; i < mb; i++) { 1988 ncol = bi[i + 1] - bi[i]; 1989 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1990 row++; 1991 pbv += ncol; 1992 cols_tmp += ncol; 1993 } 1994 PetscCall(PetscFree(cols)); 1995 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1996 1997 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1998 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1999 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2000 *matout = B; 2001 } else { 2002 PetscCall(MatHeaderMerge(A, &B)); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2008 { 2009 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2010 Mat a = aij->A, b = aij->B; 2011 PetscInt s1, s2, s3; 2012 2013 PetscFunctionBegin; 2014 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2015 if (rr) { 2016 PetscCall(VecGetLocalSize(rr, &s1)); 2017 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2018 /* Overlap communication with computation. */ 2019 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2020 } 2021 if (ll) { 2022 PetscCall(VecGetLocalSize(ll, &s1)); 2023 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2024 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2025 } 2026 /* scale the diagonal block */ 2027 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2028 2029 if (rr) { 2030 /* Do a scatter end and then right scale the off-diagonal block */ 2031 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2032 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2033 } 2034 PetscFunctionReturn(PETSC_SUCCESS); 2035 } 2036 2037 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2038 { 2039 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2040 2041 PetscFunctionBegin; 2042 PetscCall(MatSetUnfactored(a->A)); 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2047 { 2048 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2049 Mat a, b, c, d; 2050 PetscBool flg; 2051 2052 PetscFunctionBegin; 2053 a = matA->A; 2054 b = matA->B; 2055 c = matB->A; 2056 d = matB->B; 2057 2058 PetscCall(MatEqual(a, c, &flg)); 2059 if (flg) PetscCall(MatEqual(b, d, &flg)); 2060 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2061 PetscFunctionReturn(PETSC_SUCCESS); 2062 } 2063 2064 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2065 { 2066 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2067 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2068 2069 PetscFunctionBegin; 2070 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2071 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2072 /* because of the column compression in the off-processor part of the matrix a->B, 2073 the number of columns in a->B and b->B may be different, hence we cannot call 2074 the MatCopy() directly on the two parts. If need be, we can provide a more 2075 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2076 then copying the submatrices */ 2077 PetscCall(MatCopy_Basic(A, B, str)); 2078 } else { 2079 PetscCall(MatCopy(a->A, b->A, str)); 2080 PetscCall(MatCopy(a->B, b->B, str)); 2081 } 2082 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 /* 2087 Computes the number of nonzeros per row needed for preallocation when X and Y 2088 have different nonzero structure. 2089 */ 2090 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2091 { 2092 PetscInt i, j, k, nzx, nzy; 2093 2094 PetscFunctionBegin; 2095 /* Set the number of nonzeros in the new matrix */ 2096 for (i = 0; i < m; i++) { 2097 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2098 nzx = xi[i + 1] - xi[i]; 2099 nzy = yi[i + 1] - yi[i]; 2100 nnz[i] = 0; 2101 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2102 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2103 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2104 nnz[i]++; 2105 } 2106 for (; k < nzy; k++) nnz[i]++; 2107 } 2108 PetscFunctionReturn(PETSC_SUCCESS); 2109 } 2110 2111 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2112 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2113 { 2114 PetscInt m = Y->rmap->N; 2115 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2116 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2117 2118 PetscFunctionBegin; 2119 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2120 PetscFunctionReturn(PETSC_SUCCESS); 2121 } 2122 2123 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2124 { 2125 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2126 2127 PetscFunctionBegin; 2128 if (str == SAME_NONZERO_PATTERN) { 2129 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2130 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2131 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2132 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2133 } else { 2134 Mat B; 2135 PetscInt *nnz_d, *nnz_o; 2136 2137 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2138 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2139 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2140 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2141 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2142 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2143 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2144 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2145 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2146 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2147 PetscCall(MatHeaderMerge(Y, &B)); 2148 PetscCall(PetscFree(nnz_d)); 2149 PetscCall(PetscFree(nnz_o)); 2150 } 2151 PetscFunctionReturn(PETSC_SUCCESS); 2152 } 2153 2154 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2155 2156 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2157 { 2158 PetscFunctionBegin; 2159 if (PetscDefined(USE_COMPLEX)) { 2160 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2161 2162 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2163 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2164 } 2165 PetscFunctionReturn(PETSC_SUCCESS); 2166 } 2167 2168 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2169 { 2170 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2171 2172 PetscFunctionBegin; 2173 PetscCall(MatRealPart(a->A)); 2174 PetscCall(MatRealPart(a->B)); 2175 PetscFunctionReturn(PETSC_SUCCESS); 2176 } 2177 2178 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2179 { 2180 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2181 2182 PetscFunctionBegin; 2183 PetscCall(MatImaginaryPart(a->A)); 2184 PetscCall(MatImaginaryPart(a->B)); 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2189 { 2190 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2191 PetscInt i, *idxb = NULL, m = A->rmap->n; 2192 PetscScalar *va, *vv; 2193 Vec vB, vA; 2194 const PetscScalar *vb; 2195 2196 PetscFunctionBegin; 2197 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2198 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2199 2200 PetscCall(VecGetArrayWrite(vA, &va)); 2201 if (idx) { 2202 for (i = 0; i < m; i++) { 2203 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2204 } 2205 } 2206 2207 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2208 PetscCall(PetscMalloc1(m, &idxb)); 2209 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2210 2211 PetscCall(VecGetArrayWrite(v, &vv)); 2212 PetscCall(VecGetArrayRead(vB, &vb)); 2213 for (i = 0; i < m; i++) { 2214 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2215 vv[i] = vb[i]; 2216 if (idx) idx[i] = a->garray[idxb[i]]; 2217 } else { 2218 vv[i] = va[i]; 2219 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2220 } 2221 } 2222 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2223 PetscCall(VecRestoreArrayWrite(vA, &va)); 2224 PetscCall(VecRestoreArrayRead(vB, &vb)); 2225 PetscCall(PetscFree(idxb)); 2226 PetscCall(VecDestroy(&vA)); 2227 PetscCall(VecDestroy(&vB)); 2228 PetscFunctionReturn(PETSC_SUCCESS); 2229 } 2230 2231 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2232 { 2233 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2234 PetscInt m = A->rmap->n, n = A->cmap->n; 2235 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2236 PetscInt *cmap = mat->garray; 2237 PetscInt *diagIdx, *offdiagIdx; 2238 Vec diagV, offdiagV; 2239 PetscScalar *a, *diagA, *offdiagA; 2240 const PetscScalar *ba, *bav; 2241 PetscInt r, j, col, ncols, *bi, *bj; 2242 Mat B = mat->B; 2243 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2244 2245 PetscFunctionBegin; 2246 /* When a process holds entire A and other processes have no entry */ 2247 if (A->cmap->N == n) { 2248 PetscCall(VecGetArrayWrite(v, &diagA)); 2249 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2250 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2251 PetscCall(VecDestroy(&diagV)); 2252 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2253 PetscFunctionReturn(PETSC_SUCCESS); 2254 } else if (n == 0) { 2255 if (m) { 2256 PetscCall(VecGetArrayWrite(v, &a)); 2257 for (r = 0; r < m; r++) { 2258 a[r] = 0.0; 2259 if (idx) idx[r] = -1; 2260 } 2261 PetscCall(VecRestoreArrayWrite(v, &a)); 2262 } 2263 PetscFunctionReturn(PETSC_SUCCESS); 2264 } 2265 2266 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2268 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2269 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2270 2271 /* Get offdiagIdx[] for implicit 0.0 */ 2272 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2273 ba = bav; 2274 bi = b->i; 2275 bj = b->j; 2276 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2277 for (r = 0; r < m; r++) { 2278 ncols = bi[r + 1] - bi[r]; 2279 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2280 offdiagA[r] = *ba; 2281 offdiagIdx[r] = cmap[0]; 2282 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2283 offdiagA[r] = 0.0; 2284 2285 /* Find first hole in the cmap */ 2286 for (j = 0; j < ncols; j++) { 2287 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2288 if (col > j && j < cstart) { 2289 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2290 break; 2291 } else if (col > j + n && j >= cstart) { 2292 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2293 break; 2294 } 2295 } 2296 if (j == ncols && ncols < A->cmap->N - n) { 2297 /* a hole is outside compressed Bcols */ 2298 if (ncols == 0) { 2299 if (cstart) { 2300 offdiagIdx[r] = 0; 2301 } else offdiagIdx[r] = cend; 2302 } else { /* ncols > 0 */ 2303 offdiagIdx[r] = cmap[ncols - 1] + 1; 2304 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2305 } 2306 } 2307 } 2308 2309 for (j = 0; j < ncols; j++) { 2310 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2311 offdiagA[r] = *ba; 2312 offdiagIdx[r] = cmap[*bj]; 2313 } 2314 ba++; 2315 bj++; 2316 } 2317 } 2318 2319 PetscCall(VecGetArrayWrite(v, &a)); 2320 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2321 for (r = 0; r < m; ++r) { 2322 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2323 a[r] = diagA[r]; 2324 if (idx) idx[r] = cstart + diagIdx[r]; 2325 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2326 a[r] = diagA[r]; 2327 if (idx) { 2328 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2329 idx[r] = cstart + diagIdx[r]; 2330 } else idx[r] = offdiagIdx[r]; 2331 } 2332 } else { 2333 a[r] = offdiagA[r]; 2334 if (idx) idx[r] = offdiagIdx[r]; 2335 } 2336 } 2337 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2338 PetscCall(VecRestoreArrayWrite(v, &a)); 2339 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2340 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2341 PetscCall(VecDestroy(&diagV)); 2342 PetscCall(VecDestroy(&offdiagV)); 2343 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2344 PetscFunctionReturn(PETSC_SUCCESS); 2345 } 2346 2347 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2348 { 2349 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2350 PetscInt m = A->rmap->n, n = A->cmap->n; 2351 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2352 PetscInt *cmap = mat->garray; 2353 PetscInt *diagIdx, *offdiagIdx; 2354 Vec diagV, offdiagV; 2355 PetscScalar *a, *diagA, *offdiagA; 2356 const PetscScalar *ba, *bav; 2357 PetscInt r, j, col, ncols, *bi, *bj; 2358 Mat B = mat->B; 2359 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2360 2361 PetscFunctionBegin; 2362 /* When a process holds entire A and other processes have no entry */ 2363 if (A->cmap->N == n) { 2364 PetscCall(VecGetArrayWrite(v, &diagA)); 2365 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2366 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2367 PetscCall(VecDestroy(&diagV)); 2368 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2369 PetscFunctionReturn(PETSC_SUCCESS); 2370 } else if (n == 0) { 2371 if (m) { 2372 PetscCall(VecGetArrayWrite(v, &a)); 2373 for (r = 0; r < m; r++) { 2374 a[r] = PETSC_MAX_REAL; 2375 if (idx) idx[r] = -1; 2376 } 2377 PetscCall(VecRestoreArrayWrite(v, &a)); 2378 } 2379 PetscFunctionReturn(PETSC_SUCCESS); 2380 } 2381 2382 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2384 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2385 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2386 2387 /* Get offdiagIdx[] for implicit 0.0 */ 2388 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2389 ba = bav; 2390 bi = b->i; 2391 bj = b->j; 2392 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2393 for (r = 0; r < m; r++) { 2394 ncols = bi[r + 1] - bi[r]; 2395 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2396 offdiagA[r] = *ba; 2397 offdiagIdx[r] = cmap[0]; 2398 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2399 offdiagA[r] = 0.0; 2400 2401 /* Find first hole in the cmap */ 2402 for (j = 0; j < ncols; j++) { 2403 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2404 if (col > j && j < cstart) { 2405 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2406 break; 2407 } else if (col > j + n && j >= cstart) { 2408 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2409 break; 2410 } 2411 } 2412 if (j == ncols && ncols < A->cmap->N - n) { 2413 /* a hole is outside compressed Bcols */ 2414 if (ncols == 0) { 2415 if (cstart) { 2416 offdiagIdx[r] = 0; 2417 } else offdiagIdx[r] = cend; 2418 } else { /* ncols > 0 */ 2419 offdiagIdx[r] = cmap[ncols - 1] + 1; 2420 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2421 } 2422 } 2423 } 2424 2425 for (j = 0; j < ncols; j++) { 2426 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2427 offdiagA[r] = *ba; 2428 offdiagIdx[r] = cmap[*bj]; 2429 } 2430 ba++; 2431 bj++; 2432 } 2433 } 2434 2435 PetscCall(VecGetArrayWrite(v, &a)); 2436 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2437 for (r = 0; r < m; ++r) { 2438 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 if (idx) idx[r] = cstart + diagIdx[r]; 2441 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2442 a[r] = diagA[r]; 2443 if (idx) { 2444 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2445 idx[r] = cstart + diagIdx[r]; 2446 } else idx[r] = offdiagIdx[r]; 2447 } 2448 } else { 2449 a[r] = offdiagA[r]; 2450 if (idx) idx[r] = offdiagIdx[r]; 2451 } 2452 } 2453 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2454 PetscCall(VecRestoreArrayWrite(v, &a)); 2455 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2456 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2457 PetscCall(VecDestroy(&diagV)); 2458 PetscCall(VecDestroy(&offdiagV)); 2459 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2460 PetscFunctionReturn(PETSC_SUCCESS); 2461 } 2462 2463 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2464 { 2465 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2466 PetscInt m = A->rmap->n, n = A->cmap->n; 2467 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2468 PetscInt *cmap = mat->garray; 2469 PetscInt *diagIdx, *offdiagIdx; 2470 Vec diagV, offdiagV; 2471 PetscScalar *a, *diagA, *offdiagA; 2472 const PetscScalar *ba, *bav; 2473 PetscInt r, j, col, ncols, *bi, *bj; 2474 Mat B = mat->B; 2475 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2476 2477 PetscFunctionBegin; 2478 /* When a process holds entire A and other processes have no entry */ 2479 if (A->cmap->N == n) { 2480 PetscCall(VecGetArrayWrite(v, &diagA)); 2481 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2482 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2483 PetscCall(VecDestroy(&diagV)); 2484 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2485 PetscFunctionReturn(PETSC_SUCCESS); 2486 } else if (n == 0) { 2487 if (m) { 2488 PetscCall(VecGetArrayWrite(v, &a)); 2489 for (r = 0; r < m; r++) { 2490 a[r] = PETSC_MIN_REAL; 2491 if (idx) idx[r] = -1; 2492 } 2493 PetscCall(VecRestoreArrayWrite(v, &a)); 2494 } 2495 PetscFunctionReturn(PETSC_SUCCESS); 2496 } 2497 2498 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2500 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2501 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2502 2503 /* Get offdiagIdx[] for implicit 0.0 */ 2504 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2505 ba = bav; 2506 bi = b->i; 2507 bj = b->j; 2508 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2509 for (r = 0; r < m; r++) { 2510 ncols = bi[r + 1] - bi[r]; 2511 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2512 offdiagA[r] = *ba; 2513 offdiagIdx[r] = cmap[0]; 2514 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2515 offdiagA[r] = 0.0; 2516 2517 /* Find first hole in the cmap */ 2518 for (j = 0; j < ncols; j++) { 2519 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2520 if (col > j && j < cstart) { 2521 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2522 break; 2523 } else if (col > j + n && j >= cstart) { 2524 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2525 break; 2526 } 2527 } 2528 if (j == ncols && ncols < A->cmap->N - n) { 2529 /* a hole is outside compressed Bcols */ 2530 if (ncols == 0) { 2531 if (cstart) { 2532 offdiagIdx[r] = 0; 2533 } else offdiagIdx[r] = cend; 2534 } else { /* ncols > 0 */ 2535 offdiagIdx[r] = cmap[ncols - 1] + 1; 2536 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2537 } 2538 } 2539 } 2540 2541 for (j = 0; j < ncols; j++) { 2542 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2543 offdiagA[r] = *ba; 2544 offdiagIdx[r] = cmap[*bj]; 2545 } 2546 ba++; 2547 bj++; 2548 } 2549 } 2550 2551 PetscCall(VecGetArrayWrite(v, &a)); 2552 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2553 for (r = 0; r < m; ++r) { 2554 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2555 a[r] = diagA[r]; 2556 if (idx) idx[r] = cstart + diagIdx[r]; 2557 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2558 a[r] = diagA[r]; 2559 if (idx) { 2560 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2561 idx[r] = cstart + diagIdx[r]; 2562 } else idx[r] = offdiagIdx[r]; 2563 } 2564 } else { 2565 a[r] = offdiagA[r]; 2566 if (idx) idx[r] = offdiagIdx[r]; 2567 } 2568 } 2569 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2570 PetscCall(VecRestoreArrayWrite(v, &a)); 2571 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2572 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2573 PetscCall(VecDestroy(&diagV)); 2574 PetscCall(VecDestroy(&offdiagV)); 2575 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2576 PetscFunctionReturn(PETSC_SUCCESS); 2577 } 2578 2579 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2580 { 2581 Mat *dummy; 2582 2583 PetscFunctionBegin; 2584 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2585 *newmat = *dummy; 2586 PetscCall(PetscFree(dummy)); 2587 PetscFunctionReturn(PETSC_SUCCESS); 2588 } 2589 2590 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2591 { 2592 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2593 2594 PetscFunctionBegin; 2595 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2596 A->factorerrortype = a->A->factorerrortype; 2597 PetscFunctionReturn(PETSC_SUCCESS); 2598 } 2599 2600 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2601 { 2602 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2603 2604 PetscFunctionBegin; 2605 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2606 PetscCall(MatSetRandom(aij->A, rctx)); 2607 if (x->assembled) { 2608 PetscCall(MatSetRandom(aij->B, rctx)); 2609 } else { 2610 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2611 } 2612 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2613 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2614 PetscFunctionReturn(PETSC_SUCCESS); 2615 } 2616 2617 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2618 { 2619 PetscFunctionBegin; 2620 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2621 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2622 PetscFunctionReturn(PETSC_SUCCESS); 2623 } 2624 2625 /*@ 2626 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2627 2628 Not Collective 2629 2630 Input Parameter: 2631 . A - the matrix 2632 2633 Output Parameter: 2634 . nz - the number of nonzeros 2635 2636 Level: advanced 2637 2638 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `Mat` 2639 @*/ 2640 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2641 { 2642 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2643 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2644 2645 PetscFunctionBegin; 2646 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 /*@ 2651 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2652 2653 Collective 2654 2655 Input Parameters: 2656 + A - the matrix 2657 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2658 2659 Level: advanced 2660 2661 .seealso: [](chapter_matrices), `Mat`, `Mat`, `MATMPIAIJ` 2662 @*/ 2663 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2664 { 2665 PetscFunctionBegin; 2666 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2667 PetscFunctionReturn(PETSC_SUCCESS); 2668 } 2669 2670 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2671 { 2672 PetscBool sc = PETSC_FALSE, flg; 2673 2674 PetscFunctionBegin; 2675 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2676 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2677 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2678 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2679 PetscOptionsHeadEnd(); 2680 PetscFunctionReturn(PETSC_SUCCESS); 2681 } 2682 2683 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2684 { 2685 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2686 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2687 2688 PetscFunctionBegin; 2689 if (!Y->preallocated) { 2690 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2691 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2692 PetscInt nonew = aij->nonew; 2693 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2694 aij->nonew = nonew; 2695 } 2696 PetscCall(MatShift_Basic(Y, a)); 2697 PetscFunctionReturn(PETSC_SUCCESS); 2698 } 2699 2700 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2701 { 2702 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2703 2704 PetscFunctionBegin; 2705 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2706 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2707 if (d) { 2708 PetscInt rstart; 2709 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2710 *d += rstart; 2711 } 2712 PetscFunctionReturn(PETSC_SUCCESS); 2713 } 2714 2715 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2716 { 2717 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2718 2719 PetscFunctionBegin; 2720 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2725 { 2726 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2727 2728 PetscFunctionBegin; 2729 PetscCall(MatEliminateZeros(a->A)); 2730 PetscCall(MatEliminateZeros(a->B)); 2731 PetscFunctionReturn(PETSC_SUCCESS); 2732 } 2733 2734 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2735 MatGetRow_MPIAIJ, 2736 MatRestoreRow_MPIAIJ, 2737 MatMult_MPIAIJ, 2738 /* 4*/ MatMultAdd_MPIAIJ, 2739 MatMultTranspose_MPIAIJ, 2740 MatMultTransposeAdd_MPIAIJ, 2741 NULL, 2742 NULL, 2743 NULL, 2744 /*10*/ NULL, 2745 NULL, 2746 NULL, 2747 MatSOR_MPIAIJ, 2748 MatTranspose_MPIAIJ, 2749 /*15*/ MatGetInfo_MPIAIJ, 2750 MatEqual_MPIAIJ, 2751 MatGetDiagonal_MPIAIJ, 2752 MatDiagonalScale_MPIAIJ, 2753 MatNorm_MPIAIJ, 2754 /*20*/ MatAssemblyBegin_MPIAIJ, 2755 MatAssemblyEnd_MPIAIJ, 2756 MatSetOption_MPIAIJ, 2757 MatZeroEntries_MPIAIJ, 2758 /*24*/ MatZeroRows_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*29*/ MatSetUp_MPI_Hash, 2764 NULL, 2765 NULL, 2766 MatGetDiagonalBlock_MPIAIJ, 2767 NULL, 2768 /*34*/ MatDuplicate_MPIAIJ, 2769 NULL, 2770 NULL, 2771 NULL, 2772 NULL, 2773 /*39*/ MatAXPY_MPIAIJ, 2774 MatCreateSubMatrices_MPIAIJ, 2775 MatIncreaseOverlap_MPIAIJ, 2776 MatGetValues_MPIAIJ, 2777 MatCopy_MPIAIJ, 2778 /*44*/ MatGetRowMax_MPIAIJ, 2779 MatScale_MPIAIJ, 2780 MatShift_MPIAIJ, 2781 MatDiagonalSet_MPIAIJ, 2782 MatZeroRowsColumns_MPIAIJ, 2783 /*49*/ MatSetRandom_MPIAIJ, 2784 MatGetRowIJ_MPIAIJ, 2785 MatRestoreRowIJ_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*54*/ MatFDColoringCreate_MPIXAIJ, 2789 NULL, 2790 MatSetUnfactored_MPIAIJ, 2791 MatPermute_MPIAIJ, 2792 NULL, 2793 /*59*/ MatCreateSubMatrix_MPIAIJ, 2794 MatDestroy_MPIAIJ, 2795 MatView_MPIAIJ, 2796 NULL, 2797 NULL, 2798 /*64*/ NULL, 2799 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2800 NULL, 2801 NULL, 2802 NULL, 2803 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2804 MatGetRowMinAbs_MPIAIJ, 2805 NULL, 2806 NULL, 2807 NULL, 2808 NULL, 2809 /*75*/ MatFDColoringApply_AIJ, 2810 MatSetFromOptions_MPIAIJ, 2811 NULL, 2812 NULL, 2813 MatFindZeroDiagonals_MPIAIJ, 2814 /*80*/ NULL, 2815 NULL, 2816 NULL, 2817 /*83*/ MatLoad_MPIAIJ, 2818 MatIsSymmetric_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 NULL, 2823 /*89*/ NULL, 2824 NULL, 2825 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2826 NULL, 2827 NULL, 2828 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2829 NULL, 2830 NULL, 2831 NULL, 2832 MatBindToCPU_MPIAIJ, 2833 /*99*/ MatProductSetFromOptions_MPIAIJ, 2834 NULL, 2835 NULL, 2836 MatConjugate_MPIAIJ, 2837 NULL, 2838 /*104*/ MatSetValuesRow_MPIAIJ, 2839 MatRealPart_MPIAIJ, 2840 MatImaginaryPart_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*109*/ NULL, 2844 NULL, 2845 MatGetRowMin_MPIAIJ, 2846 NULL, 2847 MatMissingDiagonal_MPIAIJ, 2848 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2849 NULL, 2850 MatGetGhosts_MPIAIJ, 2851 NULL, 2852 NULL, 2853 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2854 NULL, 2855 NULL, 2856 NULL, 2857 MatGetMultiProcBlock_MPIAIJ, 2858 /*124*/ MatFindNonzeroRows_MPIAIJ, 2859 MatGetColumnReductions_MPIAIJ, 2860 MatInvertBlockDiagonal_MPIAIJ, 2861 MatInvertVariableBlockDiagonal_MPIAIJ, 2862 MatCreateSubMatricesMPI_MPIAIJ, 2863 /*129*/ NULL, 2864 NULL, 2865 NULL, 2866 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2867 NULL, 2868 /*134*/ NULL, 2869 NULL, 2870 NULL, 2871 NULL, 2872 NULL, 2873 /*139*/ MatSetBlockSizes_MPIAIJ, 2874 NULL, 2875 NULL, 2876 MatFDColoringSetUp_MPIXAIJ, 2877 MatFindOffBlockDiagonalEntries_MPIAIJ, 2878 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2879 /*145*/ NULL, 2880 NULL, 2881 NULL, 2882 MatCreateGraph_Simple_AIJ, 2883 NULL, 2884 /*150*/ NULL, 2885 MatEliminateZeros_MPIAIJ}; 2886 2887 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2888 { 2889 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2890 2891 PetscFunctionBegin; 2892 PetscCall(MatStoreValues(aij->A)); 2893 PetscCall(MatStoreValues(aij->B)); 2894 PetscFunctionReturn(PETSC_SUCCESS); 2895 } 2896 2897 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2898 { 2899 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2900 2901 PetscFunctionBegin; 2902 PetscCall(MatRetrieveValues(aij->A)); 2903 PetscCall(MatRetrieveValues(aij->B)); 2904 PetscFunctionReturn(PETSC_SUCCESS); 2905 } 2906 2907 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2908 { 2909 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2910 PetscMPIInt size; 2911 2912 PetscFunctionBegin; 2913 if (B->hash_active) { 2914 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 2915 B->hash_active = PETSC_FALSE; 2916 } 2917 PetscCall(PetscLayoutSetUp(B->rmap)); 2918 PetscCall(PetscLayoutSetUp(B->cmap)); 2919 2920 #if defined(PETSC_USE_CTABLE) 2921 PetscCall(PetscHMapIDestroy(&b->colmap)); 2922 #else 2923 PetscCall(PetscFree(b->colmap)); 2924 #endif 2925 PetscCall(PetscFree(b->garray)); 2926 PetscCall(VecDestroy(&b->lvec)); 2927 PetscCall(VecScatterDestroy(&b->Mvctx)); 2928 2929 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2930 PetscCall(MatDestroy(&b->B)); 2931 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2932 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2933 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2934 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2935 2936 PetscCall(MatDestroy(&b->A)); 2937 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2938 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2939 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2940 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2953 2954 PetscFunctionBegin; 2955 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2956 PetscCall(PetscLayoutSetUp(B->rmap)); 2957 PetscCall(PetscLayoutSetUp(B->cmap)); 2958 2959 #if defined(PETSC_USE_CTABLE) 2960 PetscCall(PetscHMapIDestroy(&b->colmap)); 2961 #else 2962 PetscCall(PetscFree(b->colmap)); 2963 #endif 2964 PetscCall(PetscFree(b->garray)); 2965 PetscCall(VecDestroy(&b->lvec)); 2966 PetscCall(VecScatterDestroy(&b->Mvctx)); 2967 2968 PetscCall(MatResetPreallocation(b->A)); 2969 PetscCall(MatResetPreallocation(b->B)); 2970 B->preallocated = PETSC_TRUE; 2971 B->was_assembled = PETSC_FALSE; 2972 B->assembled = PETSC_FALSE; 2973 PetscFunctionReturn(PETSC_SUCCESS); 2974 } 2975 2976 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2977 { 2978 Mat mat; 2979 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2980 2981 PetscFunctionBegin; 2982 *newmat = NULL; 2983 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2984 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2985 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2986 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2987 a = (Mat_MPIAIJ *)mat->data; 2988 2989 mat->factortype = matin->factortype; 2990 mat->assembled = matin->assembled; 2991 mat->insertmode = NOT_SET_VALUES; 2992 mat->preallocated = matin->preallocated; 2993 2994 a->size = oldmat->size; 2995 a->rank = oldmat->rank; 2996 a->donotstash = oldmat->donotstash; 2997 a->roworiented = oldmat->roworiented; 2998 a->rowindices = NULL; 2999 a->rowvalues = NULL; 3000 a->getrowactive = PETSC_FALSE; 3001 3002 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3003 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3004 3005 if (oldmat->colmap) { 3006 #if defined(PETSC_USE_CTABLE) 3007 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3008 #else 3009 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3010 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3011 #endif 3012 } else a->colmap = NULL; 3013 if (oldmat->garray) { 3014 PetscInt len; 3015 len = oldmat->B->cmap->n; 3016 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3017 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3018 } else a->garray = NULL; 3019 3020 /* It may happen MatDuplicate is called with a non-assembled matrix 3021 In fact, MatDuplicate only requires the matrix to be preallocated 3022 This may happen inside a DMCreateMatrix_Shell */ 3023 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3024 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3025 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3026 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3027 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3028 *newmat = mat; 3029 PetscFunctionReturn(PETSC_SUCCESS); 3030 } 3031 3032 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3033 { 3034 PetscBool isbinary, ishdf5; 3035 3036 PetscFunctionBegin; 3037 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3038 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3039 /* force binary viewer to load .info file if it has not yet done so */ 3040 PetscCall(PetscViewerSetUp(viewer)); 3041 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3043 if (isbinary) { 3044 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3045 } else if (ishdf5) { 3046 #if defined(PETSC_HAVE_HDF5) 3047 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3048 #else 3049 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3050 #endif 3051 } else { 3052 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3053 } 3054 PetscFunctionReturn(PETSC_SUCCESS); 3055 } 3056 3057 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3058 { 3059 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3060 PetscInt *rowidxs, *colidxs; 3061 PetscScalar *matvals; 3062 3063 PetscFunctionBegin; 3064 PetscCall(PetscViewerSetUp(viewer)); 3065 3066 /* read in matrix header */ 3067 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3068 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3069 M = header[1]; 3070 N = header[2]; 3071 nz = header[3]; 3072 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3073 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3074 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3075 3076 /* set block sizes from the viewer's .info file */ 3077 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3078 /* set global sizes if not set already */ 3079 if (mat->rmap->N < 0) mat->rmap->N = M; 3080 if (mat->cmap->N < 0) mat->cmap->N = N; 3081 PetscCall(PetscLayoutSetUp(mat->rmap)); 3082 PetscCall(PetscLayoutSetUp(mat->cmap)); 3083 3084 /* check if the matrix sizes are correct */ 3085 PetscCall(MatGetSize(mat, &rows, &cols)); 3086 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3087 3088 /* read in row lengths and build row indices */ 3089 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3090 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3091 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3092 rowidxs[0] = 0; 3093 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3094 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3095 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3096 /* read in column indices and matrix values */ 3097 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3100 /* store matrix indices and values */ 3101 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3102 PetscCall(PetscFree(rowidxs)); 3103 PetscCall(PetscFree2(colidxs, matvals)); 3104 PetscFunctionReturn(PETSC_SUCCESS); 3105 } 3106 3107 /* Not scalable because of ISAllGather() unless getting all columns. */ 3108 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3109 { 3110 IS iscol_local; 3111 PetscBool isstride; 3112 PetscMPIInt lisstride = 0, gisstride; 3113 3114 PetscFunctionBegin; 3115 /* check if we are grabbing all columns*/ 3116 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3117 3118 if (isstride) { 3119 PetscInt start, len, mstart, mlen; 3120 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3121 PetscCall(ISGetLocalSize(iscol, &len)); 3122 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3123 if (mstart == start && mlen - mstart == len) lisstride = 1; 3124 } 3125 3126 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3127 if (gisstride) { 3128 PetscInt N; 3129 PetscCall(MatGetSize(mat, NULL, &N)); 3130 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3131 PetscCall(ISSetIdentity(iscol_local)); 3132 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3133 } else { 3134 PetscInt cbs; 3135 PetscCall(ISGetBlockSize(iscol, &cbs)); 3136 PetscCall(ISAllGather(iscol, &iscol_local)); 3137 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3138 } 3139 3140 *isseq = iscol_local; 3141 PetscFunctionReturn(PETSC_SUCCESS); 3142 } 3143 3144 /* 3145 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3146 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3147 3148 Input Parameters: 3149 + mat - matrix 3150 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3151 i.e., mat->rstart <= isrow[i] < mat->rend 3152 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3153 i.e., mat->cstart <= iscol[i] < mat->cend 3154 3155 Output Parameters: 3156 + isrow_d - sequential row index set for retrieving mat->A 3157 . iscol_d - sequential column index set for retrieving mat->A 3158 . iscol_o - sequential column index set for retrieving mat->B 3159 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3160 */ 3161 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3162 { 3163 Vec x, cmap; 3164 const PetscInt *is_idx; 3165 PetscScalar *xarray, *cmaparray; 3166 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3168 Mat B = a->B; 3169 Vec lvec = a->lvec, lcmap; 3170 PetscInt i, cstart, cend, Bn = B->cmap->N; 3171 MPI_Comm comm; 3172 VecScatter Mvctx = a->Mvctx; 3173 3174 PetscFunctionBegin; 3175 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3176 PetscCall(ISGetLocalSize(iscol, &ncols)); 3177 3178 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3179 PetscCall(MatCreateVecs(mat, &x, NULL)); 3180 PetscCall(VecSet(x, -1.0)); 3181 PetscCall(VecDuplicate(x, &cmap)); 3182 PetscCall(VecSet(cmap, -1.0)); 3183 3184 /* Get start indices */ 3185 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3186 isstart -= ncols; 3187 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3188 3189 PetscCall(ISGetIndices(iscol, &is_idx)); 3190 PetscCall(VecGetArray(x, &xarray)); 3191 PetscCall(VecGetArray(cmap, &cmaparray)); 3192 PetscCall(PetscMalloc1(ncols, &idx)); 3193 for (i = 0; i < ncols; i++) { 3194 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3195 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3196 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3197 } 3198 PetscCall(VecRestoreArray(x, &xarray)); 3199 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3200 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3201 3202 /* Get iscol_d */ 3203 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3204 PetscCall(ISGetBlockSize(iscol, &i)); 3205 PetscCall(ISSetBlockSize(*iscol_d, i)); 3206 3207 /* Get isrow_d */ 3208 PetscCall(ISGetLocalSize(isrow, &m)); 3209 rstart = mat->rmap->rstart; 3210 PetscCall(PetscMalloc1(m, &idx)); 3211 PetscCall(ISGetIndices(isrow, &is_idx)); 3212 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3213 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3214 3215 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3216 PetscCall(ISGetBlockSize(isrow, &i)); 3217 PetscCall(ISSetBlockSize(*isrow_d, i)); 3218 3219 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3220 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3222 3223 PetscCall(VecDuplicate(lvec, &lcmap)); 3224 3225 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3227 3228 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3229 /* off-process column indices */ 3230 count = 0; 3231 PetscCall(PetscMalloc1(Bn, &idx)); 3232 PetscCall(PetscMalloc1(Bn, &cmap1)); 3233 3234 PetscCall(VecGetArray(lvec, &xarray)); 3235 PetscCall(VecGetArray(lcmap, &cmaparray)); 3236 for (i = 0; i < Bn; i++) { 3237 if (PetscRealPart(xarray[i]) > -1.0) { 3238 idx[count] = i; /* local column index in off-diagonal part B */ 3239 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3240 count++; 3241 } 3242 } 3243 PetscCall(VecRestoreArray(lvec, &xarray)); 3244 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3245 3246 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3247 /* cannot ensure iscol_o has same blocksize as iscol! */ 3248 3249 PetscCall(PetscFree(idx)); 3250 *garray = cmap1; 3251 3252 PetscCall(VecDestroy(&x)); 3253 PetscCall(VecDestroy(&cmap)); 3254 PetscCall(VecDestroy(&lcmap)); 3255 PetscFunctionReturn(PETSC_SUCCESS); 3256 } 3257 3258 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3259 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3260 { 3261 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3262 Mat M = NULL; 3263 MPI_Comm comm; 3264 IS iscol_d, isrow_d, iscol_o; 3265 Mat Asub = NULL, Bsub = NULL; 3266 PetscInt n; 3267 3268 PetscFunctionBegin; 3269 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3270 3271 if (call == MAT_REUSE_MATRIX) { 3272 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3274 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3275 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3277 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3278 3279 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3280 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3281 3282 /* Update diagonal and off-diagonal portions of submat */ 3283 asub = (Mat_MPIAIJ *)(*submat)->data; 3284 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3285 PetscCall(ISGetLocalSize(iscol_o, &n)); 3286 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3287 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3288 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3289 3290 } else { /* call == MAT_INITIAL_MATRIX) */ 3291 const PetscInt *garray; 3292 PetscInt BsubN; 3293 3294 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3295 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3296 3297 /* Create local submatrices Asub and Bsub */ 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3299 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3300 3301 /* Create submatrix M */ 3302 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3303 3304 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3305 asub = (Mat_MPIAIJ *)M->data; 3306 3307 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3308 n = asub->B->cmap->N; 3309 if (BsubN > n) { 3310 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3311 const PetscInt *idx; 3312 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3313 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3314 3315 PetscCall(PetscMalloc1(n, &idx_new)); 3316 j = 0; 3317 PetscCall(ISGetIndices(iscol_o, &idx)); 3318 for (i = 0; i < n; i++) { 3319 if (j >= BsubN) break; 3320 while (subgarray[i] > garray[j]) j++; 3321 3322 if (subgarray[i] == garray[j]) { 3323 idx_new[i] = idx[j++]; 3324 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3325 } 3326 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3327 3328 PetscCall(ISDestroy(&iscol_o)); 3329 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3330 3331 } else if (BsubN < n) { 3332 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3333 } 3334 3335 PetscCall(PetscFree(garray)); 3336 *submat = M; 3337 3338 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3339 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3340 PetscCall(ISDestroy(&isrow_d)); 3341 3342 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3343 PetscCall(ISDestroy(&iscol_d)); 3344 3345 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3346 PetscCall(ISDestroy(&iscol_o)); 3347 } 3348 PetscFunctionReturn(PETSC_SUCCESS); 3349 } 3350 3351 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3352 { 3353 IS iscol_local = NULL, isrow_d; 3354 PetscInt csize; 3355 PetscInt n, i, j, start, end; 3356 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3357 MPI_Comm comm; 3358 3359 PetscFunctionBegin; 3360 /* If isrow has same processor distribution as mat, 3361 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3362 if (call == MAT_REUSE_MATRIX) { 3363 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3364 if (isrow_d) { 3365 sameRowDist = PETSC_TRUE; 3366 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3367 } else { 3368 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3369 if (iscol_local) { 3370 sameRowDist = PETSC_TRUE; 3371 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3372 } 3373 } 3374 } else { 3375 /* Check if isrow has same processor distribution as mat */ 3376 sameDist[0] = PETSC_FALSE; 3377 PetscCall(ISGetLocalSize(isrow, &n)); 3378 if (!n) { 3379 sameDist[0] = PETSC_TRUE; 3380 } else { 3381 PetscCall(ISGetMinMax(isrow, &i, &j)); 3382 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3383 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3384 } 3385 3386 /* Check if iscol has same processor distribution as mat */ 3387 sameDist[1] = PETSC_FALSE; 3388 PetscCall(ISGetLocalSize(iscol, &n)); 3389 if (!n) { 3390 sameDist[1] = PETSC_TRUE; 3391 } else { 3392 PetscCall(ISGetMinMax(iscol, &i, &j)); 3393 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3394 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3395 } 3396 3397 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3398 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3399 sameRowDist = tsameDist[0]; 3400 } 3401 3402 if (sameRowDist) { 3403 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3404 /* isrow and iscol have same processor distribution as mat */ 3405 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3406 PetscFunctionReturn(PETSC_SUCCESS); 3407 } else { /* sameRowDist */ 3408 /* isrow has same processor distribution as mat */ 3409 if (call == MAT_INITIAL_MATRIX) { 3410 PetscBool sorted; 3411 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3412 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3413 PetscCall(ISGetSize(iscol, &i)); 3414 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3415 3416 PetscCall(ISSorted(iscol_local, &sorted)); 3417 if (sorted) { 3418 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3419 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3420 PetscFunctionReturn(PETSC_SUCCESS); 3421 } 3422 } else { /* call == MAT_REUSE_MATRIX */ 3423 IS iscol_sub; 3424 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3425 if (iscol_sub) { 3426 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3427 PetscFunctionReturn(PETSC_SUCCESS); 3428 } 3429 } 3430 } 3431 } 3432 3433 /* General case: iscol -> iscol_local which has global size of iscol */ 3434 if (call == MAT_REUSE_MATRIX) { 3435 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3436 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3437 } else { 3438 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3439 } 3440 3441 PetscCall(ISGetLocalSize(iscol, &csize)); 3442 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3443 3444 if (call == MAT_INITIAL_MATRIX) { 3445 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3446 PetscCall(ISDestroy(&iscol_local)); 3447 } 3448 PetscFunctionReturn(PETSC_SUCCESS); 3449 } 3450 3451 /*@C 3452 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3453 and "off-diagonal" part of the matrix in CSR format. 3454 3455 Collective 3456 3457 Input Parameters: 3458 + comm - MPI communicator 3459 . A - "diagonal" portion of matrix 3460 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3461 - garray - global index of `B` columns 3462 3463 Output Parameter: 3464 . mat - the matrix, with input `A` as its local diagonal matrix 3465 3466 Level: advanced 3467 3468 Notes: 3469 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3470 3471 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3472 3473 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3474 @*/ 3475 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3476 { 3477 Mat_MPIAIJ *maij; 3478 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3479 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3480 const PetscScalar *oa; 3481 Mat Bnew; 3482 PetscInt m, n, N; 3483 MatType mpi_mat_type; 3484 3485 PetscFunctionBegin; 3486 PetscCall(MatCreate(comm, mat)); 3487 PetscCall(MatGetSize(A, &m, &n)); 3488 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3489 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3490 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3491 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3492 3493 /* Get global columns of mat */ 3494 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3495 3496 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3497 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3498 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3499 PetscCall(MatSetType(*mat, mpi_mat_type)); 3500 3501 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3502 maij = (Mat_MPIAIJ *)(*mat)->data; 3503 3504 (*mat)->preallocated = PETSC_TRUE; 3505 3506 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3507 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3508 3509 /* Set A as diagonal portion of *mat */ 3510 maij->A = A; 3511 3512 nz = oi[m]; 3513 for (i = 0; i < nz; i++) { 3514 col = oj[i]; 3515 oj[i] = garray[col]; 3516 } 3517 3518 /* Set Bnew as off-diagonal portion of *mat */ 3519 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3520 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3521 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3522 bnew = (Mat_SeqAIJ *)Bnew->data; 3523 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3524 maij->B = Bnew; 3525 3526 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3527 3528 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3529 b->free_a = PETSC_FALSE; 3530 b->free_ij = PETSC_FALSE; 3531 PetscCall(MatDestroy(&B)); 3532 3533 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3534 bnew->free_a = PETSC_TRUE; 3535 bnew->free_ij = PETSC_TRUE; 3536 3537 /* condense columns of maij->B */ 3538 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3539 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3540 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3541 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3542 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3543 PetscFunctionReturn(PETSC_SUCCESS); 3544 } 3545 3546 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3547 3548 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3549 { 3550 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3551 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3552 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3553 Mat M, Msub, B = a->B; 3554 MatScalar *aa; 3555 Mat_SeqAIJ *aij; 3556 PetscInt *garray = a->garray, *colsub, Ncols; 3557 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3558 IS iscol_sub, iscmap; 3559 const PetscInt *is_idx, *cmap; 3560 PetscBool allcolumns = PETSC_FALSE; 3561 MPI_Comm comm; 3562 3563 PetscFunctionBegin; 3564 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3565 if (call == MAT_REUSE_MATRIX) { 3566 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3567 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3568 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3569 3570 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3571 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3572 3573 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3574 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3575 3576 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3577 3578 } else { /* call == MAT_INITIAL_MATRIX) */ 3579 PetscBool flg; 3580 3581 PetscCall(ISGetLocalSize(iscol, &n)); 3582 PetscCall(ISGetSize(iscol, &Ncols)); 3583 3584 /* (1) iscol -> nonscalable iscol_local */ 3585 /* Check for special case: each processor gets entire matrix columns */ 3586 PetscCall(ISIdentity(iscol_local, &flg)); 3587 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3588 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3589 if (allcolumns) { 3590 iscol_sub = iscol_local; 3591 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3592 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3593 3594 } else { 3595 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3596 PetscInt *idx, *cmap1, k; 3597 PetscCall(PetscMalloc1(Ncols, &idx)); 3598 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3599 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3600 count = 0; 3601 k = 0; 3602 for (i = 0; i < Ncols; i++) { 3603 j = is_idx[i]; 3604 if (j >= cstart && j < cend) { 3605 /* diagonal part of mat */ 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } else if (Bn) { 3609 /* off-diagonal part of mat */ 3610 if (j == garray[k]) { 3611 idx[count] = j; 3612 cmap1[count++] = i; /* column index in submat */ 3613 } else if (j > garray[k]) { 3614 while (j > garray[k] && k < Bn - 1) k++; 3615 if (j == garray[k]) { 3616 idx[count] = j; 3617 cmap1[count++] = i; /* column index in submat */ 3618 } 3619 } 3620 } 3621 } 3622 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3623 3624 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3625 PetscCall(ISGetBlockSize(iscol, &cbs)); 3626 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3627 3628 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3629 } 3630 3631 /* (3) Create sequential Msub */ 3632 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3633 } 3634 3635 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3636 aij = (Mat_SeqAIJ *)(Msub)->data; 3637 ii = aij->i; 3638 PetscCall(ISGetIndices(iscmap, &cmap)); 3639 3640 /* 3641 m - number of local rows 3642 Ncols - number of columns (same on all processors) 3643 rstart - first row in new global matrix generated 3644 */ 3645 PetscCall(MatGetSize(Msub, &m, NULL)); 3646 3647 if (call == MAT_INITIAL_MATRIX) { 3648 /* (4) Create parallel newmat */ 3649 PetscMPIInt rank, size; 3650 PetscInt csize; 3651 3652 PetscCallMPI(MPI_Comm_size(comm, &size)); 3653 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3654 3655 /* 3656 Determine the number of non-zeros in the diagonal and off-diagonal 3657 portions of the matrix in order to do correct preallocation 3658 */ 3659 3660 /* first get start and end of "diagonal" columns */ 3661 PetscCall(ISGetLocalSize(iscol, &csize)); 3662 if (csize == PETSC_DECIDE) { 3663 PetscCall(ISGetSize(isrow, &mglobal)); 3664 if (mglobal == Ncols) { /* square matrix */ 3665 nlocal = m; 3666 } else { 3667 nlocal = Ncols / size + ((Ncols % size) > rank); 3668 } 3669 } else { 3670 nlocal = csize; 3671 } 3672 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3673 rstart = rend - nlocal; 3674 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3675 3676 /* next, compute all the lengths */ 3677 jj = aij->j; 3678 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3679 olens = dlens + m; 3680 for (i = 0; i < m; i++) { 3681 jend = ii[i + 1] - ii[i]; 3682 olen = 0; 3683 dlen = 0; 3684 for (j = 0; j < jend; j++) { 3685 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3686 else dlen++; 3687 jj++; 3688 } 3689 olens[i] = olen; 3690 dlens[i] = dlen; 3691 } 3692 3693 PetscCall(ISGetBlockSize(isrow, &bs)); 3694 PetscCall(ISGetBlockSize(iscol, &cbs)); 3695 3696 PetscCall(MatCreate(comm, &M)); 3697 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3698 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3699 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3700 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3701 PetscCall(PetscFree(dlens)); 3702 3703 } else { /* call == MAT_REUSE_MATRIX */ 3704 M = *newmat; 3705 PetscCall(MatGetLocalSize(M, &i, NULL)); 3706 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3707 PetscCall(MatZeroEntries(M)); 3708 /* 3709 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3710 rather than the slower MatSetValues(). 3711 */ 3712 M->was_assembled = PETSC_TRUE; 3713 M->assembled = PETSC_FALSE; 3714 } 3715 3716 /* (5) Set values of Msub to *newmat */ 3717 PetscCall(PetscMalloc1(count, &colsub)); 3718 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3719 3720 jj = aij->j; 3721 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3722 for (i = 0; i < m; i++) { 3723 row = rstart + i; 3724 nz = ii[i + 1] - ii[i]; 3725 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3726 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3727 jj += nz; 3728 aa += nz; 3729 } 3730 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3731 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3732 3733 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3734 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3735 3736 PetscCall(PetscFree(colsub)); 3737 3738 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3739 if (call == MAT_INITIAL_MATRIX) { 3740 *newmat = M; 3741 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3742 PetscCall(MatDestroy(&Msub)); 3743 3744 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3745 PetscCall(ISDestroy(&iscol_sub)); 3746 3747 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3748 PetscCall(ISDestroy(&iscmap)); 3749 3750 if (iscol_local) { 3751 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3752 PetscCall(ISDestroy(&iscol_local)); 3753 } 3754 } 3755 PetscFunctionReturn(PETSC_SUCCESS); 3756 } 3757 3758 /* 3759 Not great since it makes two copies of the submatrix, first an SeqAIJ 3760 in local and then by concatenating the local matrices the end result. 3761 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3762 3763 This requires a sequential iscol with all indices. 3764 */ 3765 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3766 { 3767 PetscMPIInt rank, size; 3768 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3769 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3770 Mat M, Mreuse; 3771 MatScalar *aa, *vwork; 3772 MPI_Comm comm; 3773 Mat_SeqAIJ *aij; 3774 PetscBool colflag, allcolumns = PETSC_FALSE; 3775 3776 PetscFunctionBegin; 3777 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3778 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3779 PetscCallMPI(MPI_Comm_size(comm, &size)); 3780 3781 /* Check for special case: each processor gets entire matrix columns */ 3782 PetscCall(ISIdentity(iscol, &colflag)); 3783 PetscCall(ISGetLocalSize(iscol, &n)); 3784 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3785 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3786 3787 if (call == MAT_REUSE_MATRIX) { 3788 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3789 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3790 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3791 } else { 3792 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3793 } 3794 3795 /* 3796 m - number of local rows 3797 n - number of columns (same on all processors) 3798 rstart - first row in new global matrix generated 3799 */ 3800 PetscCall(MatGetSize(Mreuse, &m, &n)); 3801 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3802 if (call == MAT_INITIAL_MATRIX) { 3803 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3804 ii = aij->i; 3805 jj = aij->j; 3806 3807 /* 3808 Determine the number of non-zeros in the diagonal and off-diagonal 3809 portions of the matrix in order to do correct preallocation 3810 */ 3811 3812 /* first get start and end of "diagonal" columns */ 3813 if (csize == PETSC_DECIDE) { 3814 PetscCall(ISGetSize(isrow, &mglobal)); 3815 if (mglobal == n) { /* square matrix */ 3816 nlocal = m; 3817 } else { 3818 nlocal = n / size + ((n % size) > rank); 3819 } 3820 } else { 3821 nlocal = csize; 3822 } 3823 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3824 rstart = rend - nlocal; 3825 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3826 3827 /* next, compute all the lengths */ 3828 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3829 olens = dlens + m; 3830 for (i = 0; i < m; i++) { 3831 jend = ii[i + 1] - ii[i]; 3832 olen = 0; 3833 dlen = 0; 3834 for (j = 0; j < jend; j++) { 3835 if (*jj < rstart || *jj >= rend) olen++; 3836 else dlen++; 3837 jj++; 3838 } 3839 olens[i] = olen; 3840 dlens[i] = dlen; 3841 } 3842 PetscCall(MatCreate(comm, &M)); 3843 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3844 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3845 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3846 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3847 PetscCall(PetscFree(dlens)); 3848 } else { 3849 PetscInt ml, nl; 3850 3851 M = *newmat; 3852 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3853 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3854 PetscCall(MatZeroEntries(M)); 3855 /* 3856 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3857 rather than the slower MatSetValues(). 3858 */ 3859 M->was_assembled = PETSC_TRUE; 3860 M->assembled = PETSC_FALSE; 3861 } 3862 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3863 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3864 ii = aij->i; 3865 jj = aij->j; 3866 3867 /* trigger copy to CPU if needed */ 3868 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3869 for (i = 0; i < m; i++) { 3870 row = rstart + i; 3871 nz = ii[i + 1] - ii[i]; 3872 cwork = jj; 3873 jj += nz; 3874 vwork = aa; 3875 aa += nz; 3876 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3877 } 3878 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3879 3880 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3881 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3882 *newmat = M; 3883 3884 /* save submatrix used in processor for next request */ 3885 if (call == MAT_INITIAL_MATRIX) { 3886 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3887 PetscCall(MatDestroy(&Mreuse)); 3888 } 3889 PetscFunctionReturn(PETSC_SUCCESS); 3890 } 3891 3892 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3893 { 3894 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3895 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3896 const PetscInt *JJ; 3897 PetscBool nooffprocentries; 3898 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3899 3900 PetscFunctionBegin; 3901 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3902 3903 PetscCall(PetscLayoutSetUp(B->rmap)); 3904 PetscCall(PetscLayoutSetUp(B->cmap)); 3905 m = B->rmap->n; 3906 cstart = B->cmap->rstart; 3907 cend = B->cmap->rend; 3908 rstart = B->rmap->rstart; 3909 3910 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3911 3912 if (PetscDefined(USE_DEBUG)) { 3913 for (i = 0; i < m; i++) { 3914 nnz = Ii[i + 1] - Ii[i]; 3915 JJ = J + Ii[i]; 3916 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3917 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3918 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3919 } 3920 } 3921 3922 for (i = 0; i < m; i++) { 3923 nnz = Ii[i + 1] - Ii[i]; 3924 JJ = J + Ii[i]; 3925 nnz_max = PetscMax(nnz_max, nnz); 3926 d = 0; 3927 for (j = 0; j < nnz; j++) { 3928 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3929 } 3930 d_nnz[i] = d; 3931 o_nnz[i] = nnz - d; 3932 } 3933 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3934 PetscCall(PetscFree2(d_nnz, o_nnz)); 3935 3936 for (i = 0; i < m; i++) { 3937 ii = i + rstart; 3938 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3939 } 3940 nooffprocentries = B->nooffprocentries; 3941 B->nooffprocentries = PETSC_TRUE; 3942 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3943 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3944 B->nooffprocentries = nooffprocentries; 3945 3946 /* count number of entries below block diagonal */ 3947 PetscCall(PetscFree(Aij->ld)); 3948 PetscCall(PetscCalloc1(m, &ld)); 3949 Aij->ld = ld; 3950 for (i = 0; i < m; i++) { 3951 nnz = Ii[i + 1] - Ii[i]; 3952 j = 0; 3953 while (j < nnz && J[j] < cstart) j++; 3954 ld[i] = j; 3955 J += nnz; 3956 } 3957 3958 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3959 PetscFunctionReturn(PETSC_SUCCESS); 3960 } 3961 3962 /*@ 3963 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3964 (the default parallel PETSc format). 3965 3966 Collective 3967 3968 Input Parameters: 3969 + B - the matrix 3970 . i - the indices into j for the start of each local row (starts with zero) 3971 . j - the column indices for each local row (starts with zero) 3972 - v - optional values in the matrix 3973 3974 Level: developer 3975 3976 Notes: 3977 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3978 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3979 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3980 3981 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3982 3983 The format which is used for the sparse matrix input, is equivalent to a 3984 row-major ordering.. i.e for the following matrix, the input data expected is 3985 as shown 3986 3987 .vb 3988 1 0 0 3989 2 0 3 P0 3990 ------- 3991 4 5 6 P1 3992 3993 Process0 [P0] rows_owned=[0,1] 3994 i = {0,1,3} [size = nrow+1 = 2+1] 3995 j = {0,0,2} [size = 3] 3996 v = {1,2,3} [size = 3] 3997 3998 Process1 [P1] rows_owned=[2] 3999 i = {0,3} [size = nrow+1 = 1+1] 4000 j = {0,1,2} [size = 3] 4001 v = {4,5,6} [size = 3] 4002 .ve 4003 4004 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 4005 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4006 @*/ 4007 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4008 { 4009 PetscFunctionBegin; 4010 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4011 PetscFunctionReturn(PETSC_SUCCESS); 4012 } 4013 4014 /*@C 4015 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4016 (the default parallel PETSc format). For good matrix assembly performance 4017 the user should preallocate the matrix storage by setting the parameters 4018 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4019 performance can be increased by more than a factor of 50. 4020 4021 Collective 4022 4023 Input Parameters: 4024 + B - the matrix 4025 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4026 (same value is used for all local rows) 4027 . d_nnz - array containing the number of nonzeros in the various rows of the 4028 DIAGONAL portion of the local submatrix (possibly different for each row) 4029 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4030 The size of this array is equal to the number of local rows, i.e 'm'. 4031 For matrices that will be factored, you must leave room for (and set) 4032 the diagonal entry even if it is zero. 4033 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4034 submatrix (same value is used for all local rows). 4035 - o_nnz - array containing the number of nonzeros in the various rows of the 4036 OFF-DIAGONAL portion of the local submatrix (possibly different for 4037 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4038 structure. The size of this array is equal to the number 4039 of local rows, i.e 'm'. 4040 4041 Usage: 4042 Consider the following 8x8 matrix with 34 non-zero values, that is 4043 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4044 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4045 as follows 4046 4047 .vb 4048 1 2 0 | 0 3 0 | 0 4 4049 Proc0 0 5 6 | 7 0 0 | 8 0 4050 9 0 10 | 11 0 0 | 12 0 4051 ------------------------------------- 4052 13 0 14 | 15 16 17 | 0 0 4053 Proc1 0 18 0 | 19 20 21 | 0 0 4054 0 0 0 | 22 23 0 | 24 0 4055 ------------------------------------- 4056 Proc2 25 26 27 | 0 0 28 | 29 0 4057 30 0 0 | 31 32 33 | 0 34 4058 .ve 4059 4060 This can be represented as a collection of submatrices as 4061 .vb 4062 A B C 4063 D E F 4064 G H I 4065 .ve 4066 4067 Where the submatrices A,B,C are owned by proc0, D,E,F are 4068 owned by proc1, G,H,I are owned by proc2. 4069 4070 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4071 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4072 The 'M','N' parameters are 8,8, and have the same values on all procs. 4073 4074 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4075 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4076 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4077 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4078 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4079 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4080 4081 When d_nz, o_nz parameters are specified, d_nz storage elements are 4082 allocated for every row of the local diagonal submatrix, and o_nz 4083 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4084 One way to choose d_nz and o_nz is to use the max nonzerors per local 4085 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4086 In this case, the values of d_nz,o_nz are 4087 .vb 4088 proc0 dnz = 2, o_nz = 2 4089 proc1 dnz = 3, o_nz = 2 4090 proc2 dnz = 1, o_nz = 4 4091 .ve 4092 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4093 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4094 for proc3. i.e we are using 12+15+10=37 storage locations to store 4095 34 values. 4096 4097 When d_nnz, o_nnz parameters are specified, the storage is specified 4098 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4099 In the above case the values for d_nnz,o_nnz are 4100 .vb 4101 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4102 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4103 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4104 .ve 4105 Here the space allocated is sum of all the above values i.e 34, and 4106 hence pre-allocation is perfect. 4107 4108 Level: intermediate 4109 4110 Notes: 4111 If the *_nnz parameter is given then the *_nz parameter is ignored 4112 4113 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4114 storage. The stored row and column indices begin with zero. 4115 See [Sparse Matrices](sec_matsparse) for details. 4116 4117 The parallel matrix is partitioned such that the first m0 rows belong to 4118 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4119 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4120 4121 The DIAGONAL portion of the local submatrix of a processor can be defined 4122 as the submatrix which is obtained by extraction the part corresponding to 4123 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4124 first row that belongs to the processor, r2 is the last row belonging to 4125 the this processor, and c1-c2 is range of indices of the local part of a 4126 vector suitable for applying the matrix to. This is an mxn matrix. In the 4127 common case of a square matrix, the row and column ranges are the same and 4128 the DIAGONAL part is also square. The remaining portion of the local 4129 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4130 4131 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4132 4133 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4134 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4135 You can also run with the option `-info` and look for messages with the string 4136 malloc in them to see if additional memory allocation was needed. 4137 4138 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4139 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4140 @*/ 4141 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4142 { 4143 PetscFunctionBegin; 4144 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4145 PetscValidType(B, 1); 4146 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4147 PetscFunctionReturn(PETSC_SUCCESS); 4148 } 4149 4150 /*@ 4151 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4152 CSR format for the local rows. 4153 4154 Collective 4155 4156 Input Parameters: 4157 + comm - MPI communicator 4158 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4159 . n - This value should be the same as the local size used in creating the 4160 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4161 calculated if N is given) For square matrices n is almost always m. 4162 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4163 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4164 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4165 . j - column indices 4166 - a - optional matrix values 4167 4168 Output Parameter: 4169 . mat - the matrix 4170 4171 Level: intermediate 4172 4173 Notes: 4174 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4175 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4176 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4177 4178 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4179 4180 The format which is used for the sparse matrix input, is equivalent to a 4181 row-major ordering.. i.e for the following matrix, the input data expected is 4182 as shown 4183 4184 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4185 .vb 4186 1 0 0 4187 2 0 3 P0 4188 ------- 4189 4 5 6 P1 4190 4191 Process0 [P0] rows_owned=[0,1] 4192 i = {0,1,3} [size = nrow+1 = 2+1] 4193 j = {0,0,2} [size = 3] 4194 v = {1,2,3} [size = 3] 4195 4196 Process1 [P1] rows_owned=[2] 4197 i = {0,3} [size = nrow+1 = 1+1] 4198 j = {0,1,2} [size = 3] 4199 v = {4,5,6} [size = 3] 4200 .ve 4201 4202 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4203 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4204 @*/ 4205 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4206 { 4207 PetscFunctionBegin; 4208 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4209 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4210 PetscCall(MatCreate(comm, mat)); 4211 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4212 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4213 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4214 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4215 PetscFunctionReturn(PETSC_SUCCESS); 4216 } 4217 4218 /*@ 4219 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4220 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4221 from `MatCreateMPIAIJWithArrays()` 4222 4223 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4224 4225 Collective 4226 4227 Input Parameters: 4228 + mat - the matrix 4229 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4230 . n - This value should be the same as the local size used in creating the 4231 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4232 calculated if N is given) For square matrices n is almost always m. 4233 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4234 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4235 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4236 . J - column indices 4237 - v - matrix values 4238 4239 Level: deprecated 4240 4241 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4242 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4243 @*/ 4244 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4245 { 4246 PetscInt nnz, i; 4247 PetscBool nooffprocentries; 4248 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4249 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4250 PetscScalar *ad, *ao; 4251 PetscInt ldi, Iii, md; 4252 const PetscInt *Adi = Ad->i; 4253 PetscInt *ld = Aij->ld; 4254 4255 PetscFunctionBegin; 4256 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4257 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4258 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4259 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4260 4261 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4262 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4263 4264 for (i = 0; i < m; i++) { 4265 nnz = Ii[i + 1] - Ii[i]; 4266 Iii = Ii[i]; 4267 ldi = ld[i]; 4268 md = Adi[i + 1] - Adi[i]; 4269 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4270 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4271 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4272 ad += md; 4273 ao += nnz - md; 4274 } 4275 nooffprocentries = mat->nooffprocentries; 4276 mat->nooffprocentries = PETSC_TRUE; 4277 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4278 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4279 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4280 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4281 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4282 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4283 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4284 mat->nooffprocentries = nooffprocentries; 4285 PetscFunctionReturn(PETSC_SUCCESS); 4286 } 4287 4288 /*@ 4289 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4290 4291 Collective 4292 4293 Input Parameters: 4294 + mat - the matrix 4295 - v - matrix values, stored by row 4296 4297 Level: intermediate 4298 4299 Note: 4300 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4301 4302 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4303 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4304 @*/ 4305 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4306 { 4307 PetscInt nnz, i, m; 4308 PetscBool nooffprocentries; 4309 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4310 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4311 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4312 PetscScalar *ad, *ao; 4313 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4314 PetscInt ldi, Iii, md; 4315 PetscInt *ld = Aij->ld; 4316 4317 PetscFunctionBegin; 4318 m = mat->rmap->n; 4319 4320 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4321 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4322 Iii = 0; 4323 for (i = 0; i < m; i++) { 4324 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4325 ldi = ld[i]; 4326 md = Adi[i + 1] - Adi[i]; 4327 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4328 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4329 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4330 ad += md; 4331 ao += nnz - md; 4332 Iii += nnz; 4333 } 4334 nooffprocentries = mat->nooffprocentries; 4335 mat->nooffprocentries = PETSC_TRUE; 4336 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4337 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4338 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4339 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4340 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4341 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4342 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4343 mat->nooffprocentries = nooffprocentries; 4344 PetscFunctionReturn(PETSC_SUCCESS); 4345 } 4346 4347 /*@C 4348 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4349 (the default parallel PETSc format). For good matrix assembly performance 4350 the user should preallocate the matrix storage by setting the parameters 4351 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4352 4353 Collective 4354 4355 Input Parameters: 4356 + comm - MPI communicator 4357 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4358 This value should be the same as the local size used in creating the 4359 y vector for the matrix-vector product y = Ax. 4360 . n - This value should be the same as the local size used in creating the 4361 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4362 calculated if N is given) For square matrices n is almost always m. 4363 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4364 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4365 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4366 (same value is used for all local rows) 4367 . d_nnz - array containing the number of nonzeros in the various rows of the 4368 DIAGONAL portion of the local submatrix (possibly different for each row) 4369 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4370 The size of this array is equal to the number of local rows, i.e 'm'. 4371 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4372 submatrix (same value is used for all local rows). 4373 - o_nnz - array containing the number of nonzeros in the various rows of the 4374 OFF-DIAGONAL portion of the local submatrix (possibly different for 4375 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4376 structure. The size of this array is equal to the number 4377 of local rows, i.e 'm'. 4378 4379 Output Parameter: 4380 . A - the matrix 4381 4382 Options Database Keys: 4383 + -mat_no_inode - Do not use inodes 4384 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4385 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4386 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4387 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4388 4389 Level: intermediate 4390 4391 Notes: 4392 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4393 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4394 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4395 4396 If the *_nnz parameter is given then the *_nz parameter is ignored 4397 4398 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4399 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4400 storage requirements for this matrix. 4401 4402 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4403 processor than it must be used on all processors that share the object for 4404 that argument. 4405 4406 The user MUST specify either the local or global matrix dimensions 4407 (possibly both). 4408 4409 The parallel matrix is partitioned across processors such that the 4410 first m0 rows belong to process 0, the next m1 rows belong to 4411 process 1, the next m2 rows belong to process 2 etc.. where 4412 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4413 values corresponding to [m x N] submatrix. 4414 4415 The columns are logically partitioned with the n0 columns belonging 4416 to 0th partition, the next n1 columns belonging to the next 4417 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4418 4419 The DIAGONAL portion of the local submatrix on any given processor 4420 is the submatrix corresponding to the rows and columns m,n 4421 corresponding to the given processor. i.e diagonal matrix on 4422 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4423 etc. The remaining portion of the local submatrix [m x (N-n)] 4424 constitute the OFF-DIAGONAL portion. The example below better 4425 illustrates this concept. 4426 4427 For a square global matrix we define each processor's diagonal portion 4428 to be its local rows and the corresponding columns (a square submatrix); 4429 each processor's off-diagonal portion encompasses the remainder of the 4430 local matrix (a rectangular submatrix). 4431 4432 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4433 4434 When calling this routine with a single process communicator, a matrix of 4435 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4436 type of communicator, use the construction mechanism 4437 .vb 4438 MatCreate(...,&A); 4439 MatSetType(A,MATMPIAIJ); 4440 MatSetSizes(A, m,n,M,N); 4441 MatMPIAIJSetPreallocation(A,...); 4442 .ve 4443 4444 By default, this format uses inodes (identical nodes) when possible. 4445 We search for consecutive rows with the same nonzero structure, thereby 4446 reusing matrix information to achieve increased efficiency. 4447 4448 Usage: 4449 Consider the following 8x8 matrix with 34 non-zero values, that is 4450 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4451 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4452 as follows 4453 4454 .vb 4455 1 2 0 | 0 3 0 | 0 4 4456 Proc0 0 5 6 | 7 0 0 | 8 0 4457 9 0 10 | 11 0 0 | 12 0 4458 ------------------------------------- 4459 13 0 14 | 15 16 17 | 0 0 4460 Proc1 0 18 0 | 19 20 21 | 0 0 4461 0 0 0 | 22 23 0 | 24 0 4462 ------------------------------------- 4463 Proc2 25 26 27 | 0 0 28 | 29 0 4464 30 0 0 | 31 32 33 | 0 34 4465 .ve 4466 4467 This can be represented as a collection of submatrices as 4468 4469 .vb 4470 A B C 4471 D E F 4472 G H I 4473 .ve 4474 4475 Where the submatrices A,B,C are owned by proc0, D,E,F are 4476 owned by proc1, G,H,I are owned by proc2. 4477 4478 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4479 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4480 The 'M','N' parameters are 8,8, and have the same values on all procs. 4481 4482 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4483 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4484 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4485 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4486 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4487 matrix, ans [DF] as another SeqAIJ matrix. 4488 4489 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4490 allocated for every row of the local diagonal submatrix, and `o_nz` 4491 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4492 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4493 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4494 In this case, the values of `d_nz`,`o_nz` are 4495 .vb 4496 proc0 dnz = 2, o_nz = 2 4497 proc1 dnz = 3, o_nz = 2 4498 proc2 dnz = 1, o_nz = 4 4499 .ve 4500 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4501 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4502 for proc3. i.e we are using 12+15+10=37 storage locations to store 4503 34 values. 4504 4505 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4506 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4507 In the above case the values for d_nnz,o_nnz are 4508 .vb 4509 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4510 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4511 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4512 .ve 4513 Here the space allocated is sum of all the above values i.e 34, and 4514 hence pre-allocation is perfect. 4515 4516 .seealso: [](chapter_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4517 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4518 @*/ 4519 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4520 { 4521 PetscMPIInt size; 4522 4523 PetscFunctionBegin; 4524 PetscCall(MatCreate(comm, A)); 4525 PetscCall(MatSetSizes(*A, m, n, M, N)); 4526 PetscCallMPI(MPI_Comm_size(comm, &size)); 4527 if (size > 1) { 4528 PetscCall(MatSetType(*A, MATMPIAIJ)); 4529 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4530 } else { 4531 PetscCall(MatSetType(*A, MATSEQAIJ)); 4532 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4533 } 4534 PetscFunctionReturn(PETSC_SUCCESS); 4535 } 4536 4537 /*MC 4538 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4539 4540 Synopsis: 4541 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4542 4543 Not Collective 4544 4545 Input Parameter: 4546 . A - the `MATMPIAIJ` matrix 4547 4548 Output Parameters: 4549 + Ad - the diagonal portion of the matrix 4550 . Ao - the off diagonal portion of the matrix 4551 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4552 - ierr - error code 4553 4554 Level: advanced 4555 4556 Note: 4557 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4558 4559 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4560 M*/ 4561 4562 /*MC 4563 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4564 4565 Synopsis: 4566 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4567 4568 Not Collective 4569 4570 Input Parameters: 4571 + A - the `MATMPIAIJ` matrix 4572 . Ad - the diagonal portion of the matrix 4573 . Ao - the off diagonal portion of the matrix 4574 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4575 - ierr - error code 4576 4577 Level: advanced 4578 4579 .seealso: [](chapter_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4580 M*/ 4581 4582 /*@C 4583 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4584 4585 Not Collective 4586 4587 Input Parameter: 4588 . A - The `MATMPIAIJ` matrix 4589 4590 Output Parameters: 4591 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4592 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4593 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4594 4595 Level: intermediate 4596 4597 Note: 4598 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4599 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4600 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4601 local column numbers to global column numbers in the original matrix. 4602 4603 Fortran Note: 4604 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4605 4606 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4607 @*/ 4608 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4609 { 4610 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4611 PetscBool flg; 4612 4613 PetscFunctionBegin; 4614 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4615 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4616 if (Ad) *Ad = a->A; 4617 if (Ao) *Ao = a->B; 4618 if (colmap) *colmap = a->garray; 4619 PetscFunctionReturn(PETSC_SUCCESS); 4620 } 4621 4622 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4623 { 4624 PetscInt m, N, i, rstart, nnz, Ii; 4625 PetscInt *indx; 4626 PetscScalar *values; 4627 MatType rootType; 4628 4629 PetscFunctionBegin; 4630 PetscCall(MatGetSize(inmat, &m, &N)); 4631 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4632 PetscInt *dnz, *onz, sum, bs, cbs; 4633 4634 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4635 /* Check sum(n) = N */ 4636 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4637 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4638 4639 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4640 rstart -= m; 4641 4642 MatPreallocateBegin(comm, m, n, dnz, onz); 4643 for (i = 0; i < m; i++) { 4644 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4645 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4646 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4647 } 4648 4649 PetscCall(MatCreate(comm, outmat)); 4650 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4651 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4652 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4653 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4654 PetscCall(MatSetType(*outmat, rootType)); 4655 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4656 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4657 MatPreallocateEnd(dnz, onz); 4658 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4659 } 4660 4661 /* numeric phase */ 4662 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4663 for (i = 0; i < m; i++) { 4664 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4665 Ii = i + rstart; 4666 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4667 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4668 } 4669 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4670 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4671 PetscFunctionReturn(PETSC_SUCCESS); 4672 } 4673 4674 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4675 { 4676 PetscMPIInt rank; 4677 PetscInt m, N, i, rstart, nnz; 4678 size_t len; 4679 const PetscInt *indx; 4680 PetscViewer out; 4681 char *name; 4682 Mat B; 4683 const PetscScalar *values; 4684 4685 PetscFunctionBegin; 4686 PetscCall(MatGetLocalSize(A, &m, NULL)); 4687 PetscCall(MatGetSize(A, NULL, &N)); 4688 /* Should this be the type of the diagonal block of A? */ 4689 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4690 PetscCall(MatSetSizes(B, m, N, m, N)); 4691 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4692 PetscCall(MatSetType(B, MATSEQAIJ)); 4693 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4694 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4695 for (i = 0; i < m; i++) { 4696 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4697 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4698 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4699 } 4700 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4701 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4702 4703 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4704 PetscCall(PetscStrlen(outfile, &len)); 4705 PetscCall(PetscMalloc1(len + 6, &name)); 4706 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4707 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4708 PetscCall(PetscFree(name)); 4709 PetscCall(MatView(B, out)); 4710 PetscCall(PetscViewerDestroy(&out)); 4711 PetscCall(MatDestroy(&B)); 4712 PetscFunctionReturn(PETSC_SUCCESS); 4713 } 4714 4715 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4716 { 4717 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4718 4719 PetscFunctionBegin; 4720 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4721 PetscCall(PetscFree(merge->id_r)); 4722 PetscCall(PetscFree(merge->len_s)); 4723 PetscCall(PetscFree(merge->len_r)); 4724 PetscCall(PetscFree(merge->bi)); 4725 PetscCall(PetscFree(merge->bj)); 4726 PetscCall(PetscFree(merge->buf_ri[0])); 4727 PetscCall(PetscFree(merge->buf_ri)); 4728 PetscCall(PetscFree(merge->buf_rj[0])); 4729 PetscCall(PetscFree(merge->buf_rj)); 4730 PetscCall(PetscFree(merge->coi)); 4731 PetscCall(PetscFree(merge->coj)); 4732 PetscCall(PetscFree(merge->owners_co)); 4733 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4734 PetscCall(PetscFree(merge)); 4735 PetscFunctionReturn(PETSC_SUCCESS); 4736 } 4737 4738 #include <../src/mat/utils/freespace.h> 4739 #include <petscbt.h> 4740 4741 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4742 { 4743 MPI_Comm comm; 4744 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4745 PetscMPIInt size, rank, taga, *len_s; 4746 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4747 PetscInt proc, m; 4748 PetscInt **buf_ri, **buf_rj; 4749 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4750 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4751 MPI_Request *s_waits, *r_waits; 4752 MPI_Status *status; 4753 const MatScalar *aa, *a_a; 4754 MatScalar **abuf_r, *ba_i; 4755 Mat_Merge_SeqsToMPI *merge; 4756 PetscContainer container; 4757 4758 PetscFunctionBegin; 4759 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4760 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4761 4762 PetscCallMPI(MPI_Comm_size(comm, &size)); 4763 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4764 4765 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4766 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4767 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4768 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4769 aa = a_a; 4770 4771 bi = merge->bi; 4772 bj = merge->bj; 4773 buf_ri = merge->buf_ri; 4774 buf_rj = merge->buf_rj; 4775 4776 PetscCall(PetscMalloc1(size, &status)); 4777 owners = merge->rowmap->range; 4778 len_s = merge->len_s; 4779 4780 /* send and recv matrix values */ 4781 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4782 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4783 4784 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4785 for (proc = 0, k = 0; proc < size; proc++) { 4786 if (!len_s[proc]) continue; 4787 i = owners[proc]; 4788 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4789 k++; 4790 } 4791 4792 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4793 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4794 PetscCall(PetscFree(status)); 4795 4796 PetscCall(PetscFree(s_waits)); 4797 PetscCall(PetscFree(r_waits)); 4798 4799 /* insert mat values of mpimat */ 4800 PetscCall(PetscMalloc1(N, &ba_i)); 4801 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4802 4803 for (k = 0; k < merge->nrecv; k++) { 4804 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4805 nrows = *(buf_ri_k[k]); 4806 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4807 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4808 } 4809 4810 /* set values of ba */ 4811 m = merge->rowmap->n; 4812 for (i = 0; i < m; i++) { 4813 arow = owners[rank] + i; 4814 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4815 bnzi = bi[i + 1] - bi[i]; 4816 PetscCall(PetscArrayzero(ba_i, bnzi)); 4817 4818 /* add local non-zero vals of this proc's seqmat into ba */ 4819 anzi = ai[arow + 1] - ai[arow]; 4820 aj = a->j + ai[arow]; 4821 aa = a_a + ai[arow]; 4822 nextaj = 0; 4823 for (j = 0; nextaj < anzi; j++) { 4824 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4825 ba_i[j] += aa[nextaj++]; 4826 } 4827 } 4828 4829 /* add received vals into ba */ 4830 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4831 /* i-th row */ 4832 if (i == *nextrow[k]) { 4833 anzi = *(nextai[k] + 1) - *nextai[k]; 4834 aj = buf_rj[k] + *(nextai[k]); 4835 aa = abuf_r[k] + *(nextai[k]); 4836 nextaj = 0; 4837 for (j = 0; nextaj < anzi; j++) { 4838 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4839 ba_i[j] += aa[nextaj++]; 4840 } 4841 } 4842 nextrow[k]++; 4843 nextai[k]++; 4844 } 4845 } 4846 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4847 } 4848 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4849 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4850 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4851 4852 PetscCall(PetscFree(abuf_r[0])); 4853 PetscCall(PetscFree(abuf_r)); 4854 PetscCall(PetscFree(ba_i)); 4855 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4856 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4857 PetscFunctionReturn(PETSC_SUCCESS); 4858 } 4859 4860 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4861 { 4862 Mat B_mpi; 4863 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4864 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4865 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4866 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4867 PetscInt len, proc, *dnz, *onz, bs, cbs; 4868 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4869 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4870 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4871 MPI_Status *status; 4872 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4873 PetscBT lnkbt; 4874 Mat_Merge_SeqsToMPI *merge; 4875 PetscContainer container; 4876 4877 PetscFunctionBegin; 4878 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4879 4880 /* make sure it is a PETSc comm */ 4881 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4882 PetscCallMPI(MPI_Comm_size(comm, &size)); 4883 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4884 4885 PetscCall(PetscNew(&merge)); 4886 PetscCall(PetscMalloc1(size, &status)); 4887 4888 /* determine row ownership */ 4889 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4890 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4891 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4892 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4893 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4894 PetscCall(PetscMalloc1(size, &len_si)); 4895 PetscCall(PetscMalloc1(size, &merge->len_s)); 4896 4897 m = merge->rowmap->n; 4898 owners = merge->rowmap->range; 4899 4900 /* determine the number of messages to send, their lengths */ 4901 len_s = merge->len_s; 4902 4903 len = 0; /* length of buf_si[] */ 4904 merge->nsend = 0; 4905 for (proc = 0; proc < size; proc++) { 4906 len_si[proc] = 0; 4907 if (proc == rank) { 4908 len_s[proc] = 0; 4909 } else { 4910 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4911 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4912 } 4913 if (len_s[proc]) { 4914 merge->nsend++; 4915 nrows = 0; 4916 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4917 if (ai[i + 1] > ai[i]) nrows++; 4918 } 4919 len_si[proc] = 2 * (nrows + 1); 4920 len += len_si[proc]; 4921 } 4922 } 4923 4924 /* determine the number and length of messages to receive for ij-structure */ 4925 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4926 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4927 4928 /* post the Irecv of j-structure */ 4929 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4930 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4931 4932 /* post the Isend of j-structure */ 4933 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4934 4935 for (proc = 0, k = 0; proc < size; proc++) { 4936 if (!len_s[proc]) continue; 4937 i = owners[proc]; 4938 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4939 k++; 4940 } 4941 4942 /* receives and sends of j-structure are complete */ 4943 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4944 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4945 4946 /* send and recv i-structure */ 4947 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4948 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4949 4950 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4951 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4952 for (proc = 0, k = 0; proc < size; proc++) { 4953 if (!len_s[proc]) continue; 4954 /* form outgoing message for i-structure: 4955 buf_si[0]: nrows to be sent 4956 [1:nrows]: row index (global) 4957 [nrows+1:2*nrows+1]: i-structure index 4958 */ 4959 nrows = len_si[proc] / 2 - 1; 4960 buf_si_i = buf_si + nrows + 1; 4961 buf_si[0] = nrows; 4962 buf_si_i[0] = 0; 4963 nrows = 0; 4964 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4965 anzi = ai[i + 1] - ai[i]; 4966 if (anzi) { 4967 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4968 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4969 nrows++; 4970 } 4971 } 4972 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4973 k++; 4974 buf_si += len_si[proc]; 4975 } 4976 4977 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4978 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4979 4980 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4981 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4982 4983 PetscCall(PetscFree(len_si)); 4984 PetscCall(PetscFree(len_ri)); 4985 PetscCall(PetscFree(rj_waits)); 4986 PetscCall(PetscFree2(si_waits, sj_waits)); 4987 PetscCall(PetscFree(ri_waits)); 4988 PetscCall(PetscFree(buf_s)); 4989 PetscCall(PetscFree(status)); 4990 4991 /* compute a local seq matrix in each processor */ 4992 /* allocate bi array and free space for accumulating nonzero column info */ 4993 PetscCall(PetscMalloc1(m + 1, &bi)); 4994 bi[0] = 0; 4995 4996 /* create and initialize a linked list */ 4997 nlnk = N + 1; 4998 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4999 5000 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5001 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5002 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5003 5004 current_space = free_space; 5005 5006 /* determine symbolic info for each local row */ 5007 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5008 5009 for (k = 0; k < merge->nrecv; k++) { 5010 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5011 nrows = *buf_ri_k[k]; 5012 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5013 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5014 } 5015 5016 MatPreallocateBegin(comm, m, n, dnz, onz); 5017 len = 0; 5018 for (i = 0; i < m; i++) { 5019 bnzi = 0; 5020 /* add local non-zero cols of this proc's seqmat into lnk */ 5021 arow = owners[rank] + i; 5022 anzi = ai[arow + 1] - ai[arow]; 5023 aj = a->j + ai[arow]; 5024 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5025 bnzi += nlnk; 5026 /* add received col data into lnk */ 5027 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5028 if (i == *nextrow[k]) { /* i-th row */ 5029 anzi = *(nextai[k] + 1) - *nextai[k]; 5030 aj = buf_rj[k] + *nextai[k]; 5031 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5032 bnzi += nlnk; 5033 nextrow[k]++; 5034 nextai[k]++; 5035 } 5036 } 5037 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5038 5039 /* if free space is not available, make more free space */ 5040 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5041 /* copy data into free space, then initialize lnk */ 5042 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5043 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5044 5045 current_space->array += bnzi; 5046 current_space->local_used += bnzi; 5047 current_space->local_remaining -= bnzi; 5048 5049 bi[i + 1] = bi[i] + bnzi; 5050 } 5051 5052 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5053 5054 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5055 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5056 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5057 5058 /* create symbolic parallel matrix B_mpi */ 5059 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5060 PetscCall(MatCreate(comm, &B_mpi)); 5061 if (n == PETSC_DECIDE) { 5062 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5063 } else { 5064 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5065 } 5066 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5067 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5068 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5069 MatPreallocateEnd(dnz, onz); 5070 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5071 5072 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5073 B_mpi->assembled = PETSC_FALSE; 5074 merge->bi = bi; 5075 merge->bj = bj; 5076 merge->buf_ri = buf_ri; 5077 merge->buf_rj = buf_rj; 5078 merge->coi = NULL; 5079 merge->coj = NULL; 5080 merge->owners_co = NULL; 5081 5082 PetscCall(PetscCommDestroy(&comm)); 5083 5084 /* attach the supporting struct to B_mpi for reuse */ 5085 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5086 PetscCall(PetscContainerSetPointer(container, merge)); 5087 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5088 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5089 PetscCall(PetscContainerDestroy(&container)); 5090 *mpimat = B_mpi; 5091 5092 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5093 PetscFunctionReturn(PETSC_SUCCESS); 5094 } 5095 5096 /*@C 5097 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5098 matrices from each processor 5099 5100 Collective 5101 5102 Input Parameters: 5103 + comm - the communicators the parallel matrix will live on 5104 . seqmat - the input sequential matrices 5105 . m - number of local rows (or `PETSC_DECIDE`) 5106 . n - number of local columns (or `PETSC_DECIDE`) 5107 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5108 5109 Output Parameter: 5110 . mpimat - the parallel matrix generated 5111 5112 Level: advanced 5113 5114 Note: 5115 The dimensions of the sequential matrix in each processor MUST be the same. 5116 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5117 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5118 5119 seealso: [](chapter_matrices), `Mat`, `MatCreateAIJ()` 5120 @*/ 5121 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5122 { 5123 PetscMPIInt size; 5124 5125 PetscFunctionBegin; 5126 PetscCallMPI(MPI_Comm_size(comm, &size)); 5127 if (size == 1) { 5128 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5129 if (scall == MAT_INITIAL_MATRIX) { 5130 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5131 } else { 5132 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5133 } 5134 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5135 PetscFunctionReturn(PETSC_SUCCESS); 5136 } 5137 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5138 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5139 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5140 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5141 PetscFunctionReturn(PETSC_SUCCESS); 5142 } 5143 5144 /*@ 5145 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5146 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5147 with `MatGetSize()` 5148 5149 Not Collective 5150 5151 Input Parameters: 5152 + A - the matrix 5153 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5154 5155 Output Parameter: 5156 . A_loc - the local sequential matrix generated 5157 5158 Level: developer 5159 5160 Notes: 5161 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5162 5163 Destroy the matrix with `MatDestroy()` 5164 5165 .seealso: [](chapter_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5166 @*/ 5167 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5168 { 5169 PetscBool mpi; 5170 5171 PetscFunctionBegin; 5172 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5173 if (mpi) { 5174 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5175 } else { 5176 *A_loc = A; 5177 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5178 } 5179 PetscFunctionReturn(PETSC_SUCCESS); 5180 } 5181 5182 /*@ 5183 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5184 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5185 with `MatGetSize()` 5186 5187 Not Collective 5188 5189 Input Parameters: 5190 + A - the matrix 5191 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5192 5193 Output Parameter: 5194 . A_loc - the local sequential matrix generated 5195 5196 Level: developer 5197 5198 Notes: 5199 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5200 5201 When the communicator associated with `A` has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A`. 5202 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called. 5203 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5204 modify the values of the returned `A_loc`. 5205 5206 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5207 @*/ 5208 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5209 { 5210 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5211 Mat_SeqAIJ *mat, *a, *b; 5212 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5213 const PetscScalar *aa, *ba, *aav, *bav; 5214 PetscScalar *ca, *cam; 5215 PetscMPIInt size; 5216 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5217 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5218 PetscBool match; 5219 5220 PetscFunctionBegin; 5221 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5222 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5223 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5224 if (size == 1) { 5225 if (scall == MAT_INITIAL_MATRIX) { 5226 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5227 *A_loc = mpimat->A; 5228 } else if (scall == MAT_REUSE_MATRIX) { 5229 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5230 } 5231 PetscFunctionReturn(PETSC_SUCCESS); 5232 } 5233 5234 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5235 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5236 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5237 ai = a->i; 5238 aj = a->j; 5239 bi = b->i; 5240 bj = b->j; 5241 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5242 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5243 aa = aav; 5244 ba = bav; 5245 if (scall == MAT_INITIAL_MATRIX) { 5246 PetscCall(PetscMalloc1(1 + am, &ci)); 5247 ci[0] = 0; 5248 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5249 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5250 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5251 k = 0; 5252 for (i = 0; i < am; i++) { 5253 ncols_o = bi[i + 1] - bi[i]; 5254 ncols_d = ai[i + 1] - ai[i]; 5255 /* off-diagonal portion of A */ 5256 for (jo = 0; jo < ncols_o; jo++) { 5257 col = cmap[*bj]; 5258 if (col >= cstart) break; 5259 cj[k] = col; 5260 bj++; 5261 ca[k++] = *ba++; 5262 } 5263 /* diagonal portion of A */ 5264 for (j = 0; j < ncols_d; j++) { 5265 cj[k] = cstart + *aj++; 5266 ca[k++] = *aa++; 5267 } 5268 /* off-diagonal portion of A */ 5269 for (j = jo; j < ncols_o; j++) { 5270 cj[k] = cmap[*bj++]; 5271 ca[k++] = *ba++; 5272 } 5273 } 5274 /* put together the new matrix */ 5275 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5276 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5277 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5278 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5279 mat->free_a = PETSC_TRUE; 5280 mat->free_ij = PETSC_TRUE; 5281 mat->nonew = 0; 5282 } else if (scall == MAT_REUSE_MATRIX) { 5283 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5284 ci = mat->i; 5285 cj = mat->j; 5286 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5287 for (i = 0; i < am; i++) { 5288 /* off-diagonal portion of A */ 5289 ncols_o = bi[i + 1] - bi[i]; 5290 for (jo = 0; jo < ncols_o; jo++) { 5291 col = cmap[*bj]; 5292 if (col >= cstart) break; 5293 *cam++ = *ba++; 5294 bj++; 5295 } 5296 /* diagonal portion of A */ 5297 ncols_d = ai[i + 1] - ai[i]; 5298 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5299 /* off-diagonal portion of A */ 5300 for (j = jo; j < ncols_o; j++) { 5301 *cam++ = *ba++; 5302 bj++; 5303 } 5304 } 5305 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5306 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5307 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5308 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5309 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5310 PetscFunctionReturn(PETSC_SUCCESS); 5311 } 5312 5313 /*@ 5314 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5315 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5316 5317 Not Collective 5318 5319 Input Parameters: 5320 + A - the matrix 5321 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5322 5323 Output Parameters: 5324 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5325 - A_loc - the local sequential matrix generated 5326 5327 Level: developer 5328 5329 Note: 5330 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5331 part, then those associated with the off diagonal part (in its local ordering) 5332 5333 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5334 @*/ 5335 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5336 { 5337 Mat Ao, Ad; 5338 const PetscInt *cmap; 5339 PetscMPIInt size; 5340 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5341 5342 PetscFunctionBegin; 5343 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5344 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5345 if (size == 1) { 5346 if (scall == MAT_INITIAL_MATRIX) { 5347 PetscCall(PetscObjectReference((PetscObject)Ad)); 5348 *A_loc = Ad; 5349 } else if (scall == MAT_REUSE_MATRIX) { 5350 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5351 } 5352 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5353 PetscFunctionReturn(PETSC_SUCCESS); 5354 } 5355 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5356 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5357 if (f) { 5358 PetscCall((*f)(A, scall, glob, A_loc)); 5359 } else { 5360 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5361 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5362 Mat_SeqAIJ *c; 5363 PetscInt *ai = a->i, *aj = a->j; 5364 PetscInt *bi = b->i, *bj = b->j; 5365 PetscInt *ci, *cj; 5366 const PetscScalar *aa, *ba; 5367 PetscScalar *ca; 5368 PetscInt i, j, am, dn, on; 5369 5370 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5371 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5372 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5373 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5374 if (scall == MAT_INITIAL_MATRIX) { 5375 PetscInt k; 5376 PetscCall(PetscMalloc1(1 + am, &ci)); 5377 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5378 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5379 ci[0] = 0; 5380 for (i = 0, k = 0; i < am; i++) { 5381 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5382 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5383 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5384 /* diagonal portion of A */ 5385 for (j = 0; j < ncols_d; j++, k++) { 5386 cj[k] = *aj++; 5387 ca[k] = *aa++; 5388 } 5389 /* off-diagonal portion of A */ 5390 for (j = 0; j < ncols_o; j++, k++) { 5391 cj[k] = dn + *bj++; 5392 ca[k] = *ba++; 5393 } 5394 } 5395 /* put together the new matrix */ 5396 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5397 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5398 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5399 c = (Mat_SeqAIJ *)(*A_loc)->data; 5400 c->free_a = PETSC_TRUE; 5401 c->free_ij = PETSC_TRUE; 5402 c->nonew = 0; 5403 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5404 } else if (scall == MAT_REUSE_MATRIX) { 5405 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5406 for (i = 0; i < am; i++) { 5407 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5408 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5409 /* diagonal portion of A */ 5410 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5411 /* off-diagonal portion of A */ 5412 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5413 } 5414 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5415 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5416 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5417 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5418 if (glob) { 5419 PetscInt cst, *gidx; 5420 5421 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5422 PetscCall(PetscMalloc1(dn + on, &gidx)); 5423 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5424 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5425 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5426 } 5427 } 5428 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5429 PetscFunctionReturn(PETSC_SUCCESS); 5430 } 5431 5432 /*@C 5433 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5434 5435 Not Collective 5436 5437 Input Parameters: 5438 + A - the matrix 5439 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5440 - row, col - index sets of rows and columns to extract (or `NULL`) 5441 5442 Output Parameter: 5443 . A_loc - the local sequential matrix generated 5444 5445 Level: developer 5446 5447 .seealso: [](chapter_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5448 @*/ 5449 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5450 { 5451 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5452 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5453 IS isrowa, iscola; 5454 Mat *aloc; 5455 PetscBool match; 5456 5457 PetscFunctionBegin; 5458 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5459 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5460 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5461 if (!row) { 5462 start = A->rmap->rstart; 5463 end = A->rmap->rend; 5464 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5465 } else { 5466 isrowa = *row; 5467 } 5468 if (!col) { 5469 start = A->cmap->rstart; 5470 cmap = a->garray; 5471 nzA = a->A->cmap->n; 5472 nzB = a->B->cmap->n; 5473 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5474 ncols = 0; 5475 for (i = 0; i < nzB; i++) { 5476 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5477 else break; 5478 } 5479 imark = i; 5480 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5481 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5482 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5483 } else { 5484 iscola = *col; 5485 } 5486 if (scall != MAT_INITIAL_MATRIX) { 5487 PetscCall(PetscMalloc1(1, &aloc)); 5488 aloc[0] = *A_loc; 5489 } 5490 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5491 if (!col) { /* attach global id of condensed columns */ 5492 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5493 } 5494 *A_loc = aloc[0]; 5495 PetscCall(PetscFree(aloc)); 5496 if (!row) PetscCall(ISDestroy(&isrowa)); 5497 if (!col) PetscCall(ISDestroy(&iscola)); 5498 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5499 PetscFunctionReturn(PETSC_SUCCESS); 5500 } 5501 5502 /* 5503 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5504 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5505 * on a global size. 5506 * */ 5507 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5508 { 5509 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5510 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5511 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5512 PetscMPIInt owner; 5513 PetscSFNode *iremote, *oiremote; 5514 const PetscInt *lrowindices; 5515 PetscSF sf, osf; 5516 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5517 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5518 MPI_Comm comm; 5519 ISLocalToGlobalMapping mapping; 5520 const PetscScalar *pd_a, *po_a; 5521 5522 PetscFunctionBegin; 5523 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5524 /* plocalsize is the number of roots 5525 * nrows is the number of leaves 5526 * */ 5527 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5528 PetscCall(ISGetLocalSize(rows, &nrows)); 5529 PetscCall(PetscCalloc1(nrows, &iremote)); 5530 PetscCall(ISGetIndices(rows, &lrowindices)); 5531 for (i = 0; i < nrows; i++) { 5532 /* Find a remote index and an owner for a row 5533 * The row could be local or remote 5534 * */ 5535 owner = 0; 5536 lidx = 0; 5537 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5538 iremote[i].index = lidx; 5539 iremote[i].rank = owner; 5540 } 5541 /* Create SF to communicate how many nonzero columns for each row */ 5542 PetscCall(PetscSFCreate(comm, &sf)); 5543 /* SF will figure out the number of nonzero colunms for each row, and their 5544 * offsets 5545 * */ 5546 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5547 PetscCall(PetscSFSetFromOptions(sf)); 5548 PetscCall(PetscSFSetUp(sf)); 5549 5550 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5551 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5552 PetscCall(PetscCalloc1(nrows, &pnnz)); 5553 roffsets[0] = 0; 5554 roffsets[1] = 0; 5555 for (i = 0; i < plocalsize; i++) { 5556 /* diag */ 5557 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5558 /* off diag */ 5559 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5560 /* compute offsets so that we relative location for each row */ 5561 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5562 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5563 } 5564 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5565 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5566 /* 'r' means root, and 'l' means leaf */ 5567 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5568 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5569 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5570 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5571 PetscCall(PetscSFDestroy(&sf)); 5572 PetscCall(PetscFree(roffsets)); 5573 PetscCall(PetscFree(nrcols)); 5574 dntotalcols = 0; 5575 ontotalcols = 0; 5576 ncol = 0; 5577 for (i = 0; i < nrows; i++) { 5578 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5579 ncol = PetscMax(pnnz[i], ncol); 5580 /* diag */ 5581 dntotalcols += nlcols[i * 2 + 0]; 5582 /* off diag */ 5583 ontotalcols += nlcols[i * 2 + 1]; 5584 } 5585 /* We do not need to figure the right number of columns 5586 * since all the calculations will be done by going through the raw data 5587 * */ 5588 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5589 PetscCall(MatSetUp(*P_oth)); 5590 PetscCall(PetscFree(pnnz)); 5591 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5592 /* diag */ 5593 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5594 /* off diag */ 5595 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5596 /* diag */ 5597 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5598 /* off diag */ 5599 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5600 dntotalcols = 0; 5601 ontotalcols = 0; 5602 ntotalcols = 0; 5603 for (i = 0; i < nrows; i++) { 5604 owner = 0; 5605 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5606 /* Set iremote for diag matrix */ 5607 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5608 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5609 iremote[dntotalcols].rank = owner; 5610 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5611 ilocal[dntotalcols++] = ntotalcols++; 5612 } 5613 /* off diag */ 5614 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5615 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5616 oiremote[ontotalcols].rank = owner; 5617 oilocal[ontotalcols++] = ntotalcols++; 5618 } 5619 } 5620 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5621 PetscCall(PetscFree(loffsets)); 5622 PetscCall(PetscFree(nlcols)); 5623 PetscCall(PetscSFCreate(comm, &sf)); 5624 /* P serves as roots and P_oth is leaves 5625 * Diag matrix 5626 * */ 5627 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5628 PetscCall(PetscSFSetFromOptions(sf)); 5629 PetscCall(PetscSFSetUp(sf)); 5630 5631 PetscCall(PetscSFCreate(comm, &osf)); 5632 /* Off diag */ 5633 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5634 PetscCall(PetscSFSetFromOptions(osf)); 5635 PetscCall(PetscSFSetUp(osf)); 5636 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5637 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5638 /* We operate on the matrix internal data for saving memory */ 5639 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5640 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5641 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5642 /* Convert to global indices for diag matrix */ 5643 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5644 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5645 /* We want P_oth store global indices */ 5646 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5647 /* Use memory scalable approach */ 5648 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5649 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5650 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5651 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5652 /* Convert back to local indices */ 5653 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5654 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5655 nout = 0; 5656 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5657 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5658 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5659 /* Exchange values */ 5660 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5661 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5662 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5663 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5664 /* Stop PETSc from shrinking memory */ 5665 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5666 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5667 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5668 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5669 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5670 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5671 PetscCall(PetscSFDestroy(&sf)); 5672 PetscCall(PetscSFDestroy(&osf)); 5673 PetscFunctionReturn(PETSC_SUCCESS); 5674 } 5675 5676 /* 5677 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5678 * This supports MPIAIJ and MAIJ 5679 * */ 5680 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5681 { 5682 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5683 Mat_SeqAIJ *p_oth; 5684 IS rows, map; 5685 PetscHMapI hamp; 5686 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5687 MPI_Comm comm; 5688 PetscSF sf, osf; 5689 PetscBool has; 5690 5691 PetscFunctionBegin; 5692 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5693 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5694 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5695 * and then create a submatrix (that often is an overlapping matrix) 5696 * */ 5697 if (reuse == MAT_INITIAL_MATRIX) { 5698 /* Use a hash table to figure out unique keys */ 5699 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5700 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5701 count = 0; 5702 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5703 for (i = 0; i < a->B->cmap->n; i++) { 5704 key = a->garray[i] / dof; 5705 PetscCall(PetscHMapIHas(hamp, key, &has)); 5706 if (!has) { 5707 mapping[i] = count; 5708 PetscCall(PetscHMapISet(hamp, key, count++)); 5709 } else { 5710 /* Current 'i' has the same value the previous step */ 5711 mapping[i] = count - 1; 5712 } 5713 } 5714 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5715 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5716 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5717 PetscCall(PetscCalloc1(htsize, &rowindices)); 5718 off = 0; 5719 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5720 PetscCall(PetscHMapIDestroy(&hamp)); 5721 PetscCall(PetscSortInt(htsize, rowindices)); 5722 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5723 /* In case, the matrix was already created but users want to recreate the matrix */ 5724 PetscCall(MatDestroy(P_oth)); 5725 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5726 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5727 PetscCall(ISDestroy(&map)); 5728 PetscCall(ISDestroy(&rows)); 5729 } else if (reuse == MAT_REUSE_MATRIX) { 5730 /* If matrix was already created, we simply update values using SF objects 5731 * that as attached to the matrix earlier. 5732 */ 5733 const PetscScalar *pd_a, *po_a; 5734 5735 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5736 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5737 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5738 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5739 /* Update values in place */ 5740 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5741 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5742 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5743 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5744 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5745 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5746 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5747 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5748 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5749 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5750 PetscFunctionReturn(PETSC_SUCCESS); 5751 } 5752 5753 /*@C 5754 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5755 5756 Collective 5757 5758 Input Parameters: 5759 + A - the first matrix in `MATMPIAIJ` format 5760 . B - the second matrix in `MATMPIAIJ` format 5761 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5762 5763 Output Parameters: 5764 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5765 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5766 - B_seq - the sequential matrix generated 5767 5768 Level: developer 5769 5770 @*/ 5771 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5772 { 5773 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5774 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5775 IS isrowb, iscolb; 5776 Mat *bseq = NULL; 5777 5778 PetscFunctionBegin; 5779 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5780 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5781 } 5782 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5783 5784 if (scall == MAT_INITIAL_MATRIX) { 5785 start = A->cmap->rstart; 5786 cmap = a->garray; 5787 nzA = a->A->cmap->n; 5788 nzB = a->B->cmap->n; 5789 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5790 ncols = 0; 5791 for (i = 0; i < nzB; i++) { /* row < local row index */ 5792 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5793 else break; 5794 } 5795 imark = i; 5796 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5797 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5798 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5799 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5800 } else { 5801 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5802 isrowb = *rowb; 5803 iscolb = *colb; 5804 PetscCall(PetscMalloc1(1, &bseq)); 5805 bseq[0] = *B_seq; 5806 } 5807 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5808 *B_seq = bseq[0]; 5809 PetscCall(PetscFree(bseq)); 5810 if (!rowb) { 5811 PetscCall(ISDestroy(&isrowb)); 5812 } else { 5813 *rowb = isrowb; 5814 } 5815 if (!colb) { 5816 PetscCall(ISDestroy(&iscolb)); 5817 } else { 5818 *colb = iscolb; 5819 } 5820 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5821 PetscFunctionReturn(PETSC_SUCCESS); 5822 } 5823 5824 /* 5825 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5826 of the OFF-DIAGONAL portion of local A 5827 5828 Collective 5829 5830 Input Parameters: 5831 + A,B - the matrices in `MATMPIAIJ` format 5832 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5833 5834 Output Parameter: 5835 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5836 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5837 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5838 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5839 5840 Developer Note: 5841 This directly accesses information inside the VecScatter associated with the matrix-vector product 5842 for this matrix. This is not desirable.. 5843 5844 Level: developer 5845 5846 */ 5847 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5848 { 5849 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5850 Mat_SeqAIJ *b_oth; 5851 VecScatter ctx; 5852 MPI_Comm comm; 5853 const PetscMPIInt *rprocs, *sprocs; 5854 const PetscInt *srow, *rstarts, *sstarts; 5855 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5856 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5857 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5858 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5859 PetscMPIInt size, tag, rank, nreqs; 5860 5861 PetscFunctionBegin; 5862 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5863 PetscCallMPI(MPI_Comm_size(comm, &size)); 5864 5865 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5866 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5867 } 5868 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5869 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5870 5871 if (size == 1) { 5872 startsj_s = NULL; 5873 bufa_ptr = NULL; 5874 *B_oth = NULL; 5875 PetscFunctionReturn(PETSC_SUCCESS); 5876 } 5877 5878 ctx = a->Mvctx; 5879 tag = ((PetscObject)ctx)->tag; 5880 5881 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5882 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5883 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5884 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5885 PetscCall(PetscMalloc1(nreqs, &reqs)); 5886 rwaits = reqs; 5887 swaits = reqs + nrecvs; 5888 5889 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5890 if (scall == MAT_INITIAL_MATRIX) { 5891 /* i-array */ 5892 /* post receives */ 5893 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5894 for (i = 0; i < nrecvs; i++) { 5895 rowlen = rvalues + rstarts[i] * rbs; 5896 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5897 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5898 } 5899 5900 /* pack the outgoing message */ 5901 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5902 5903 sstartsj[0] = 0; 5904 rstartsj[0] = 0; 5905 len = 0; /* total length of j or a array to be sent */ 5906 if (nsends) { 5907 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5908 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5909 } 5910 for (i = 0; i < nsends; i++) { 5911 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5912 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5913 for (j = 0; j < nrows; j++) { 5914 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5915 for (l = 0; l < sbs; l++) { 5916 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5917 5918 rowlen[j * sbs + l] = ncols; 5919 5920 len += ncols; 5921 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5922 } 5923 k++; 5924 } 5925 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5926 5927 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5928 } 5929 /* recvs and sends of i-array are completed */ 5930 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5931 PetscCall(PetscFree(svalues)); 5932 5933 /* allocate buffers for sending j and a arrays */ 5934 PetscCall(PetscMalloc1(len + 1, &bufj)); 5935 PetscCall(PetscMalloc1(len + 1, &bufa)); 5936 5937 /* create i-array of B_oth */ 5938 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5939 5940 b_othi[0] = 0; 5941 len = 0; /* total length of j or a array to be received */ 5942 k = 0; 5943 for (i = 0; i < nrecvs; i++) { 5944 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5945 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5946 for (j = 0; j < nrows; j++) { 5947 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5948 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5949 k++; 5950 } 5951 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5952 } 5953 PetscCall(PetscFree(rvalues)); 5954 5955 /* allocate space for j and a arrays of B_oth */ 5956 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5957 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5958 5959 /* j-array */ 5960 /* post receives of j-array */ 5961 for (i = 0; i < nrecvs; i++) { 5962 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5963 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5964 } 5965 5966 /* pack the outgoing message j-array */ 5967 if (nsends) k = sstarts[0]; 5968 for (i = 0; i < nsends; i++) { 5969 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5970 bufJ = bufj + sstartsj[i]; 5971 for (j = 0; j < nrows; j++) { 5972 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5973 for (ll = 0; ll < sbs; ll++) { 5974 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5975 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5976 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5977 } 5978 } 5979 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5980 } 5981 5982 /* recvs and sends of j-array are completed */ 5983 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5984 } else if (scall == MAT_REUSE_MATRIX) { 5985 sstartsj = *startsj_s; 5986 rstartsj = *startsj_r; 5987 bufa = *bufa_ptr; 5988 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5989 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5990 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5991 5992 /* a-array */ 5993 /* post receives of a-array */ 5994 for (i = 0; i < nrecvs; i++) { 5995 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5996 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5997 } 5998 5999 /* pack the outgoing message a-array */ 6000 if (nsends) k = sstarts[0]; 6001 for (i = 0; i < nsends; i++) { 6002 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6003 bufA = bufa + sstartsj[i]; 6004 for (j = 0; j < nrows; j++) { 6005 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6006 for (ll = 0; ll < sbs; ll++) { 6007 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6008 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6009 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6010 } 6011 } 6012 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6013 } 6014 /* recvs and sends of a-array are completed */ 6015 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6016 PetscCall(PetscFree(reqs)); 6017 6018 if (scall == MAT_INITIAL_MATRIX) { 6019 /* put together the new matrix */ 6020 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6021 6022 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6023 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6024 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6025 b_oth->free_a = PETSC_TRUE; 6026 b_oth->free_ij = PETSC_TRUE; 6027 b_oth->nonew = 0; 6028 6029 PetscCall(PetscFree(bufj)); 6030 if (!startsj_s || !bufa_ptr) { 6031 PetscCall(PetscFree2(sstartsj, rstartsj)); 6032 PetscCall(PetscFree(bufa_ptr)); 6033 } else { 6034 *startsj_s = sstartsj; 6035 *startsj_r = rstartsj; 6036 *bufa_ptr = bufa; 6037 } 6038 } else if (scall == MAT_REUSE_MATRIX) { 6039 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6040 } 6041 6042 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6043 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6044 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6045 PetscFunctionReturn(PETSC_SUCCESS); 6046 } 6047 6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6051 #if defined(PETSC_HAVE_MKL_SPARSE) 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6056 #if defined(PETSC_HAVE_ELEMENTAL) 6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6058 #endif 6059 #if defined(PETSC_HAVE_SCALAPACK) 6060 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6061 #endif 6062 #if defined(PETSC_HAVE_HYPRE) 6063 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6064 #endif 6065 #if defined(PETSC_HAVE_CUDA) 6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6067 #endif 6068 #if defined(PETSC_HAVE_HIP) 6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6070 #endif 6071 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6073 #endif 6074 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6075 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6076 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6077 6078 /* 6079 Computes (B'*A')' since computing B*A directly is untenable 6080 6081 n p p 6082 [ ] [ ] [ ] 6083 m [ A ] * n [ B ] = m [ C ] 6084 [ ] [ ] [ ] 6085 6086 */ 6087 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6088 { 6089 Mat At, Bt, Ct; 6090 6091 PetscFunctionBegin; 6092 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6093 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6094 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6095 PetscCall(MatDestroy(&At)); 6096 PetscCall(MatDestroy(&Bt)); 6097 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6098 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6099 PetscCall(MatDestroy(&Ct)); 6100 PetscFunctionReturn(PETSC_SUCCESS); 6101 } 6102 6103 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6104 { 6105 PetscBool cisdense; 6106 6107 PetscFunctionBegin; 6108 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6109 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6110 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6111 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6112 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6113 PetscCall(MatSetUp(C)); 6114 6115 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6116 PetscFunctionReturn(PETSC_SUCCESS); 6117 } 6118 6119 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6120 { 6121 Mat_Product *product = C->product; 6122 Mat A = product->A, B = product->B; 6123 6124 PetscFunctionBegin; 6125 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6126 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6127 6128 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6129 C->ops->productsymbolic = MatProductSymbolic_AB; 6130 PetscFunctionReturn(PETSC_SUCCESS); 6131 } 6132 6133 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6134 { 6135 Mat_Product *product = C->product; 6136 6137 PetscFunctionBegin; 6138 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6139 PetscFunctionReturn(PETSC_SUCCESS); 6140 } 6141 6142 /* 6143 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6144 6145 Input Parameters: 6146 6147 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6148 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6149 6150 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6151 6152 For Set1, j1[] contains column indices of the nonzeros. 6153 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6154 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6155 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6156 6157 Similar for Set2. 6158 6159 This routine merges the two sets of nonzeros row by row and removes repeats. 6160 6161 Output Parameters: (memory is allocated by the caller) 6162 6163 i[],j[]: the CSR of the merged matrix, which has m rows. 6164 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6165 imap2[]: similar to imap1[], but for Set2. 6166 Note we order nonzeros row-by-row and from left to right. 6167 */ 6168 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6169 { 6170 PetscInt r, m; /* Row index of mat */ 6171 PetscCount t, t1, t2, b1, e1, b2, e2; 6172 6173 PetscFunctionBegin; 6174 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6175 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6176 i[0] = 0; 6177 for (r = 0; r < m; r++) { /* Do row by row merging */ 6178 b1 = rowBegin1[r]; 6179 e1 = rowEnd1[r]; 6180 b2 = rowBegin2[r]; 6181 e2 = rowEnd2[r]; 6182 while (b1 < e1 && b2 < e2) { 6183 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6184 j[t] = j1[b1]; 6185 imap1[t1] = t; 6186 imap2[t2] = t; 6187 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6188 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6189 t1++; 6190 t2++; 6191 t++; 6192 } else if (j1[b1] < j2[b2]) { 6193 j[t] = j1[b1]; 6194 imap1[t1] = t; 6195 b1 += jmap1[t1 + 1] - jmap1[t1]; 6196 t1++; 6197 t++; 6198 } else { 6199 j[t] = j2[b2]; 6200 imap2[t2] = t; 6201 b2 += jmap2[t2 + 1] - jmap2[t2]; 6202 t2++; 6203 t++; 6204 } 6205 } 6206 /* Merge the remaining in either j1[] or j2[] */ 6207 while (b1 < e1) { 6208 j[t] = j1[b1]; 6209 imap1[t1] = t; 6210 b1 += jmap1[t1 + 1] - jmap1[t1]; 6211 t1++; 6212 t++; 6213 } 6214 while (b2 < e2) { 6215 j[t] = j2[b2]; 6216 imap2[t2] = t; 6217 b2 += jmap2[t2 + 1] - jmap2[t2]; 6218 t2++; 6219 t++; 6220 } 6221 i[r + 1] = t; 6222 } 6223 PetscFunctionReturn(PETSC_SUCCESS); 6224 } 6225 6226 /* 6227 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6228 6229 Input Parameters: 6230 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6231 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6232 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6233 6234 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6235 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6236 6237 Output Parameters: 6238 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6239 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6240 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6241 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6242 6243 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6244 Atot: number of entries belonging to the diagonal block. 6245 Annz: number of unique nonzeros belonging to the diagonal block. 6246 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6247 repeats (i.e., same 'i,j' pair). 6248 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6249 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6250 6251 Atot: number of entries belonging to the diagonal block 6252 Annz: number of unique nonzeros belonging to the diagonal block. 6253 6254 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6255 6256 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6257 */ 6258 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6259 { 6260 PetscInt cstart, cend, rstart, rend, row, col; 6261 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6262 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6263 PetscCount k, m, p, q, r, s, mid; 6264 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6265 6266 PetscFunctionBegin; 6267 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6268 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6269 m = rend - rstart; 6270 6271 for (k = 0; k < n; k++) { 6272 if (i[k] >= 0) break; 6273 } /* Skip negative rows */ 6274 6275 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6276 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6277 */ 6278 while (k < n) { 6279 row = i[k]; 6280 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6281 for (s = k; s < n; s++) 6282 if (i[s] != row) break; 6283 for (p = k; p < s; p++) { 6284 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6285 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6286 } 6287 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6288 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6289 rowBegin[row - rstart] = k; 6290 rowMid[row - rstart] = mid; 6291 rowEnd[row - rstart] = s; 6292 6293 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6294 Atot += mid - k; 6295 Btot += s - mid; 6296 6297 /* Count unique nonzeros of this diag/offdiag row */ 6298 for (p = k; p < mid;) { 6299 col = j[p]; 6300 do { 6301 j[p] += PETSC_MAX_INT; 6302 p++; 6303 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6304 Annz++; 6305 } 6306 6307 for (p = mid; p < s;) { 6308 col = j[p]; 6309 do { 6310 p++; 6311 } while (p < s && j[p] == col); 6312 Bnnz++; 6313 } 6314 k = s; 6315 } 6316 6317 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6318 PetscCall(PetscMalloc1(Atot, &Aperm)); 6319 PetscCall(PetscMalloc1(Btot, &Bperm)); 6320 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6321 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6322 6323 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6324 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6325 for (r = 0; r < m; r++) { 6326 k = rowBegin[r]; 6327 mid = rowMid[r]; 6328 s = rowEnd[r]; 6329 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6330 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6331 Atot += mid - k; 6332 Btot += s - mid; 6333 6334 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6335 for (p = k; p < mid;) { 6336 col = j[p]; 6337 q = p; 6338 do { 6339 p++; 6340 } while (p < mid && j[p] == col); 6341 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6342 Annz++; 6343 } 6344 6345 for (p = mid; p < s;) { 6346 col = j[p]; 6347 q = p; 6348 do { 6349 p++; 6350 } while (p < s && j[p] == col); 6351 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6352 Bnnz++; 6353 } 6354 } 6355 /* Output */ 6356 *Aperm_ = Aperm; 6357 *Annz_ = Annz; 6358 *Atot_ = Atot; 6359 *Ajmap_ = Ajmap; 6360 *Bperm_ = Bperm; 6361 *Bnnz_ = Bnnz; 6362 *Btot_ = Btot; 6363 *Bjmap_ = Bjmap; 6364 PetscFunctionReturn(PETSC_SUCCESS); 6365 } 6366 6367 /* 6368 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6369 6370 Input Parameters: 6371 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6372 nnz: number of unique nonzeros in the merged matrix 6373 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6374 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6375 6376 Output Parameter: (memory is allocated by the caller) 6377 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6378 6379 Example: 6380 nnz1 = 4 6381 nnz = 6 6382 imap = [1,3,4,5] 6383 jmap = [0,3,5,6,7] 6384 then, 6385 jmap_new = [0,0,3,3,5,6,7] 6386 */ 6387 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6388 { 6389 PetscCount k, p; 6390 6391 PetscFunctionBegin; 6392 jmap_new[0] = 0; 6393 p = nnz; /* p loops over jmap_new[] backwards */ 6394 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6395 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6396 } 6397 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6398 PetscFunctionReturn(PETSC_SUCCESS); 6399 } 6400 6401 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6402 { 6403 MPI_Comm comm; 6404 PetscMPIInt rank, size; 6405 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6406 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6407 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6408 6409 PetscFunctionBegin; 6410 PetscCall(PetscFree(mpiaij->garray)); 6411 PetscCall(VecDestroy(&mpiaij->lvec)); 6412 #if defined(PETSC_USE_CTABLE) 6413 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6414 #else 6415 PetscCall(PetscFree(mpiaij->colmap)); 6416 #endif 6417 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6418 mat->assembled = PETSC_FALSE; 6419 mat->was_assembled = PETSC_FALSE; 6420 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6421 6422 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6423 PetscCallMPI(MPI_Comm_size(comm, &size)); 6424 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6425 PetscCall(PetscLayoutSetUp(mat->rmap)); 6426 PetscCall(PetscLayoutSetUp(mat->cmap)); 6427 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6428 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6429 PetscCall(MatGetLocalSize(mat, &m, &n)); 6430 PetscCall(MatGetSize(mat, &M, &N)); 6431 6432 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6433 /* entries come first, then local rows, then remote rows. */ 6434 PetscCount n1 = coo_n, *perm1; 6435 PetscInt *i1 = coo_i, *j1 = coo_j; 6436 6437 PetscCall(PetscMalloc1(n1, &perm1)); 6438 for (k = 0; k < n1; k++) perm1[k] = k; 6439 6440 /* Manipulate indices so that entries with negative row or col indices will have smallest 6441 row indices, local entries will have greater but negative row indices, and remote entries 6442 will have positive row indices. 6443 */ 6444 for (k = 0; k < n1; k++) { 6445 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6446 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6447 else { 6448 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6449 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6450 } 6451 } 6452 6453 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6454 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6455 for (k = 0; k < n1; k++) { 6456 if (i1[k] > PETSC_MIN_INT) break; 6457 } /* Advance k to the first entry we need to take care of */ 6458 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6459 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6460 6461 /* Split local rows into diag/offdiag portions */ 6462 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6463 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6464 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6465 6466 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6467 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6468 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6469 6470 /* Send remote rows to their owner */ 6471 /* Find which rows should be sent to which remote ranks*/ 6472 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6473 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6474 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6475 const PetscInt *ranges; 6476 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6477 6478 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6479 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6480 for (k = rem; k < n1;) { 6481 PetscMPIInt owner; 6482 PetscInt firstRow, lastRow; 6483 6484 /* Locate a row range */ 6485 firstRow = i1[k]; /* first row of this owner */ 6486 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6487 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6488 6489 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6490 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6491 6492 /* All entries in [k,p) belong to this remote owner */ 6493 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6494 PetscMPIInt *sendto2; 6495 PetscInt *nentries2; 6496 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6497 6498 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6499 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6500 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6501 PetscCall(PetscFree2(sendto, nentries2)); 6502 sendto = sendto2; 6503 nentries = nentries2; 6504 maxNsend = maxNsend2; 6505 } 6506 sendto[nsend] = owner; 6507 nentries[nsend] = p - k; 6508 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6509 nsend++; 6510 k = p; 6511 } 6512 6513 /* Build 1st SF to know offsets on remote to send data */ 6514 PetscSF sf1; 6515 PetscInt nroots = 1, nroots2 = 0; 6516 PetscInt nleaves = nsend, nleaves2 = 0; 6517 PetscInt *offsets; 6518 PetscSFNode *iremote; 6519 6520 PetscCall(PetscSFCreate(comm, &sf1)); 6521 PetscCall(PetscMalloc1(nsend, &iremote)); 6522 PetscCall(PetscMalloc1(nsend, &offsets)); 6523 for (k = 0; k < nsend; k++) { 6524 iremote[k].rank = sendto[k]; 6525 iremote[k].index = 0; 6526 nleaves2 += nentries[k]; 6527 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6528 } 6529 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6530 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6531 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6532 PetscCall(PetscSFDestroy(&sf1)); 6533 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6534 6535 /* Build 2nd SF to send remote COOs to their owner */ 6536 PetscSF sf2; 6537 nroots = nroots2; 6538 nleaves = nleaves2; 6539 PetscCall(PetscSFCreate(comm, &sf2)); 6540 PetscCall(PetscSFSetFromOptions(sf2)); 6541 PetscCall(PetscMalloc1(nleaves, &iremote)); 6542 p = 0; 6543 for (k = 0; k < nsend; k++) { 6544 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6545 for (q = 0; q < nentries[k]; q++, p++) { 6546 iremote[p].rank = sendto[k]; 6547 iremote[p].index = offsets[k] + q; 6548 } 6549 } 6550 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6551 6552 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6553 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6554 6555 /* Send the remote COOs to their owner */ 6556 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6557 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6558 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6559 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6560 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6561 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6562 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6563 6564 PetscCall(PetscFree(offsets)); 6565 PetscCall(PetscFree2(sendto, nentries)); 6566 6567 /* Sort received COOs by row along with the permutation array */ 6568 for (k = 0; k < n2; k++) perm2[k] = k; 6569 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6570 6571 /* Split received COOs into diag/offdiag portions */ 6572 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6573 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6574 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6575 6576 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6577 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6578 6579 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6580 PetscInt *Ai, *Bi; 6581 PetscInt *Aj, *Bj; 6582 6583 PetscCall(PetscMalloc1(m + 1, &Ai)); 6584 PetscCall(PetscMalloc1(m + 1, &Bi)); 6585 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6586 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6587 6588 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6589 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6590 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6591 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6592 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6593 6594 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6595 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6596 6597 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6598 /* expect nonzeros in A/B most likely have local contributing entries */ 6599 PetscInt Annz = Ai[m]; 6600 PetscInt Bnnz = Bi[m]; 6601 PetscCount *Ajmap1_new, *Bjmap1_new; 6602 6603 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6604 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6605 6606 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6607 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6608 6609 PetscCall(PetscFree(Aimap1)); 6610 PetscCall(PetscFree(Ajmap1)); 6611 PetscCall(PetscFree(Bimap1)); 6612 PetscCall(PetscFree(Bjmap1)); 6613 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6614 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6615 PetscCall(PetscFree(perm1)); 6616 PetscCall(PetscFree3(i2, j2, perm2)); 6617 6618 Ajmap1 = Ajmap1_new; 6619 Bjmap1 = Bjmap1_new; 6620 6621 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6622 if (Annz < Annz1 + Annz2) { 6623 PetscInt *Aj_new; 6624 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6625 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6626 PetscCall(PetscFree(Aj)); 6627 Aj = Aj_new; 6628 } 6629 6630 if (Bnnz < Bnnz1 + Bnnz2) { 6631 PetscInt *Bj_new; 6632 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6633 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6634 PetscCall(PetscFree(Bj)); 6635 Bj = Bj_new; 6636 } 6637 6638 /* Create new submatrices for on-process and off-process coupling */ 6639 PetscScalar *Aa, *Ba; 6640 MatType rtype; 6641 Mat_SeqAIJ *a, *b; 6642 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6643 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6644 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6645 if (cstart) { 6646 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6647 } 6648 PetscCall(MatDestroy(&mpiaij->A)); 6649 PetscCall(MatDestroy(&mpiaij->B)); 6650 PetscCall(MatGetRootType_Private(mat, &rtype)); 6651 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6652 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6653 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6654 6655 a = (Mat_SeqAIJ *)mpiaij->A->data; 6656 b = (Mat_SeqAIJ *)mpiaij->B->data; 6657 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6658 a->free_a = b->free_a = PETSC_TRUE; 6659 a->free_ij = b->free_ij = PETSC_TRUE; 6660 6661 /* conversion must happen AFTER multiply setup */ 6662 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6663 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6664 PetscCall(VecDestroy(&mpiaij->lvec)); 6665 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6666 6667 mpiaij->coo_n = coo_n; 6668 mpiaij->coo_sf = sf2; 6669 mpiaij->sendlen = nleaves; 6670 mpiaij->recvlen = nroots; 6671 6672 mpiaij->Annz = Annz; 6673 mpiaij->Bnnz = Bnnz; 6674 6675 mpiaij->Annz2 = Annz2; 6676 mpiaij->Bnnz2 = Bnnz2; 6677 6678 mpiaij->Atot1 = Atot1; 6679 mpiaij->Atot2 = Atot2; 6680 mpiaij->Btot1 = Btot1; 6681 mpiaij->Btot2 = Btot2; 6682 6683 mpiaij->Ajmap1 = Ajmap1; 6684 mpiaij->Aperm1 = Aperm1; 6685 6686 mpiaij->Bjmap1 = Bjmap1; 6687 mpiaij->Bperm1 = Bperm1; 6688 6689 mpiaij->Aimap2 = Aimap2; 6690 mpiaij->Ajmap2 = Ajmap2; 6691 mpiaij->Aperm2 = Aperm2; 6692 6693 mpiaij->Bimap2 = Bimap2; 6694 mpiaij->Bjmap2 = Bjmap2; 6695 mpiaij->Bperm2 = Bperm2; 6696 6697 mpiaij->Cperm1 = Cperm1; 6698 6699 /* Allocate in preallocation. If not used, it has zero cost on host */ 6700 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6701 PetscFunctionReturn(PETSC_SUCCESS); 6702 } 6703 6704 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6705 { 6706 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6707 Mat A = mpiaij->A, B = mpiaij->B; 6708 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6709 PetscScalar *Aa, *Ba; 6710 PetscScalar *sendbuf = mpiaij->sendbuf; 6711 PetscScalar *recvbuf = mpiaij->recvbuf; 6712 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6713 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6714 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6715 const PetscCount *Cperm1 = mpiaij->Cperm1; 6716 6717 PetscFunctionBegin; 6718 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6719 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6720 6721 /* Pack entries to be sent to remote */ 6722 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6723 6724 /* Send remote entries to their owner and overlap the communication with local computation */ 6725 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6726 /* Add local entries to A and B */ 6727 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6728 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6729 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6730 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6731 } 6732 for (PetscCount i = 0; i < Bnnz; i++) { 6733 PetscScalar sum = 0.0; 6734 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6735 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6736 } 6737 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6738 6739 /* Add received remote entries to A and B */ 6740 for (PetscCount i = 0; i < Annz2; i++) { 6741 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6742 } 6743 for (PetscCount i = 0; i < Bnnz2; i++) { 6744 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6745 } 6746 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6747 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6748 PetscFunctionReturn(PETSC_SUCCESS); 6749 } 6750 6751 /*MC 6752 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6753 6754 Options Database Keys: 6755 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6756 6757 Level: beginner 6758 6759 Notes: 6760 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6761 in this case the values associated with the rows and columns one passes in are set to zero 6762 in the matrix 6763 6764 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6765 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6766 6767 .seealso: [](chapter_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6768 M*/ 6769 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6770 { 6771 Mat_MPIAIJ *b; 6772 PetscMPIInt size; 6773 6774 PetscFunctionBegin; 6775 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6776 6777 PetscCall(PetscNew(&b)); 6778 B->data = (void *)b; 6779 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6780 B->assembled = PETSC_FALSE; 6781 B->insertmode = NOT_SET_VALUES; 6782 b->size = size; 6783 6784 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6785 6786 /* build cache for off array entries formed */ 6787 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6788 6789 b->donotstash = PETSC_FALSE; 6790 b->colmap = NULL; 6791 b->garray = NULL; 6792 b->roworiented = PETSC_TRUE; 6793 6794 /* stuff used for matrix vector multiply */ 6795 b->lvec = NULL; 6796 b->Mvctx = NULL; 6797 6798 /* stuff for MatGetRow() */ 6799 b->rowindices = NULL; 6800 b->rowvalues = NULL; 6801 b->getrowactive = PETSC_FALSE; 6802 6803 /* flexible pointer used in CUSPARSE classes */ 6804 b->spptr = NULL; 6805 6806 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6807 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6808 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6809 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6810 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6811 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6812 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6813 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6814 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6815 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6816 #if defined(PETSC_HAVE_CUDA) 6817 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6818 #endif 6819 #if defined(PETSC_HAVE_HIP) 6820 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6821 #endif 6822 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6823 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6824 #endif 6825 #if defined(PETSC_HAVE_MKL_SPARSE) 6826 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6827 #endif 6828 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6829 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6831 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6832 #if defined(PETSC_HAVE_ELEMENTAL) 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6834 #endif 6835 #if defined(PETSC_HAVE_SCALAPACK) 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6837 #endif 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6840 #if defined(PETSC_HAVE_HYPRE) 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6843 #endif 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6848 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6849 PetscFunctionReturn(PETSC_SUCCESS); 6850 } 6851 6852 /*@C 6853 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6854 and "off-diagonal" part of the matrix in CSR format. 6855 6856 Collective 6857 6858 Input Parameters: 6859 + comm - MPI communicator 6860 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6861 . n - This value should be the same as the local size used in creating the 6862 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6863 calculated if `N` is given) For square matrices `n` is almost always `m`. 6864 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6865 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6866 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6867 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6868 . a - matrix values 6869 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6870 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6871 - oa - matrix values 6872 6873 Output Parameter: 6874 . mat - the matrix 6875 6876 Level: advanced 6877 6878 Notes: 6879 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6880 must free the arrays once the matrix has been destroyed and not before. 6881 6882 The `i` and `j` indices are 0 based 6883 6884 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6885 6886 This sets local rows and cannot be used to set off-processor values. 6887 6888 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6889 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6890 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6891 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6892 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6893 communication if it is known that only local entries will be set. 6894 6895 .seealso: [](chapter_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6896 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6897 @*/ 6898 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6899 { 6900 Mat_MPIAIJ *maij; 6901 6902 PetscFunctionBegin; 6903 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6904 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6905 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6906 PetscCall(MatCreate(comm, mat)); 6907 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6908 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6909 maij = (Mat_MPIAIJ *)(*mat)->data; 6910 6911 (*mat)->preallocated = PETSC_TRUE; 6912 6913 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6914 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6915 6916 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6917 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6918 6919 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6920 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6921 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6922 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6923 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6924 PetscFunctionReturn(PETSC_SUCCESS); 6925 } 6926 6927 typedef struct { 6928 Mat *mp; /* intermediate products */ 6929 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6930 PetscInt cp; /* number of intermediate products */ 6931 6932 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6933 PetscInt *startsj_s, *startsj_r; 6934 PetscScalar *bufa; 6935 Mat P_oth; 6936 6937 /* may take advantage of merging product->B */ 6938 Mat Bloc; /* B-local by merging diag and off-diag */ 6939 6940 /* cusparse does not have support to split between symbolic and numeric phases. 6941 When api_user is true, we don't need to update the numerical values 6942 of the temporary storage */ 6943 PetscBool reusesym; 6944 6945 /* support for COO values insertion */ 6946 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6947 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6948 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6949 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6950 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6951 PetscMemType mtype; 6952 6953 /* customization */ 6954 PetscBool abmerge; 6955 PetscBool P_oth_bind; 6956 } MatMatMPIAIJBACKEND; 6957 6958 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6959 { 6960 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6961 PetscInt i; 6962 6963 PetscFunctionBegin; 6964 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6965 PetscCall(PetscFree(mmdata->bufa)); 6966 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6967 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6968 PetscCall(MatDestroy(&mmdata->P_oth)); 6969 PetscCall(MatDestroy(&mmdata->Bloc)); 6970 PetscCall(PetscSFDestroy(&mmdata->sf)); 6971 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6972 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6973 PetscCall(PetscFree(mmdata->own[0])); 6974 PetscCall(PetscFree(mmdata->own)); 6975 PetscCall(PetscFree(mmdata->off[0])); 6976 PetscCall(PetscFree(mmdata->off)); 6977 PetscCall(PetscFree(mmdata)); 6978 PetscFunctionReturn(PETSC_SUCCESS); 6979 } 6980 6981 /* Copy selected n entries with indices in idx[] of A to v[]. 6982 If idx is NULL, copy the whole data array of A to v[] 6983 */ 6984 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 6985 { 6986 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6987 6988 PetscFunctionBegin; 6989 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6990 if (f) { 6991 PetscCall((*f)(A, n, idx, v)); 6992 } else { 6993 const PetscScalar *vv; 6994 6995 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6996 if (n && idx) { 6997 PetscScalar *w = v; 6998 const PetscInt *oi = idx; 6999 PetscInt j; 7000 7001 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7002 } else { 7003 PetscCall(PetscArraycpy(v, vv, n)); 7004 } 7005 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7006 } 7007 PetscFunctionReturn(PETSC_SUCCESS); 7008 } 7009 7010 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7011 { 7012 MatMatMPIAIJBACKEND *mmdata; 7013 PetscInt i, n_d, n_o; 7014 7015 PetscFunctionBegin; 7016 MatCheckProduct(C, 1); 7017 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7018 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7019 if (!mmdata->reusesym) { /* update temporary matrices */ 7020 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7021 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7022 } 7023 mmdata->reusesym = PETSC_FALSE; 7024 7025 for (i = 0; i < mmdata->cp; i++) { 7026 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7027 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7028 } 7029 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7030 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7031 7032 if (mmdata->mptmp[i]) continue; 7033 if (noff) { 7034 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7035 7036 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7037 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7038 n_o += noff; 7039 n_d += nown; 7040 } else { 7041 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7042 7043 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7044 n_d += mm->nz; 7045 } 7046 } 7047 if (mmdata->hasoffproc) { /* offprocess insertion */ 7048 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7049 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7050 } 7051 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7052 PetscFunctionReturn(PETSC_SUCCESS); 7053 } 7054 7055 /* Support for Pt * A, A * P, or Pt * A * P */ 7056 #define MAX_NUMBER_INTERMEDIATE 4 7057 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7058 { 7059 Mat_Product *product = C->product; 7060 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7061 Mat_MPIAIJ *a, *p; 7062 MatMatMPIAIJBACKEND *mmdata; 7063 ISLocalToGlobalMapping P_oth_l2g = NULL; 7064 IS glob = NULL; 7065 const char *prefix; 7066 char pprefix[256]; 7067 const PetscInt *globidx, *P_oth_idx; 7068 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7069 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7070 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7071 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7072 /* a base offset; type-2: sparse with a local to global map table */ 7073 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7074 7075 MatProductType ptype; 7076 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7077 PetscMPIInt size; 7078 7079 PetscFunctionBegin; 7080 MatCheckProduct(C, 1); 7081 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7082 ptype = product->type; 7083 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7084 ptype = MATPRODUCT_AB; 7085 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7086 } 7087 switch (ptype) { 7088 case MATPRODUCT_AB: 7089 A = product->A; 7090 P = product->B; 7091 m = A->rmap->n; 7092 n = P->cmap->n; 7093 M = A->rmap->N; 7094 N = P->cmap->N; 7095 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7096 break; 7097 case MATPRODUCT_AtB: 7098 P = product->A; 7099 A = product->B; 7100 m = P->cmap->n; 7101 n = A->cmap->n; 7102 M = P->cmap->N; 7103 N = A->cmap->N; 7104 hasoffproc = PETSC_TRUE; 7105 break; 7106 case MATPRODUCT_PtAP: 7107 A = product->A; 7108 P = product->B; 7109 m = P->cmap->n; 7110 n = P->cmap->n; 7111 M = P->cmap->N; 7112 N = P->cmap->N; 7113 hasoffproc = PETSC_TRUE; 7114 break; 7115 default: 7116 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7117 } 7118 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7119 if (size == 1) hasoffproc = PETSC_FALSE; 7120 7121 /* defaults */ 7122 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7123 mp[i] = NULL; 7124 mptmp[i] = PETSC_FALSE; 7125 rmapt[i] = -1; 7126 cmapt[i] = -1; 7127 rmapa[i] = NULL; 7128 cmapa[i] = NULL; 7129 } 7130 7131 /* customization */ 7132 PetscCall(PetscNew(&mmdata)); 7133 mmdata->reusesym = product->api_user; 7134 if (ptype == MATPRODUCT_AB) { 7135 if (product->api_user) { 7136 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7137 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7138 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7139 PetscOptionsEnd(); 7140 } else { 7141 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7142 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7143 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7144 PetscOptionsEnd(); 7145 } 7146 } else if (ptype == MATPRODUCT_PtAP) { 7147 if (product->api_user) { 7148 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7149 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7150 PetscOptionsEnd(); 7151 } else { 7152 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7153 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7154 PetscOptionsEnd(); 7155 } 7156 } 7157 a = (Mat_MPIAIJ *)A->data; 7158 p = (Mat_MPIAIJ *)P->data; 7159 PetscCall(MatSetSizes(C, m, n, M, N)); 7160 PetscCall(PetscLayoutSetUp(C->rmap)); 7161 PetscCall(PetscLayoutSetUp(C->cmap)); 7162 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7163 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7164 7165 cp = 0; 7166 switch (ptype) { 7167 case MATPRODUCT_AB: /* A * P */ 7168 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7169 7170 /* A_diag * P_local (merged or not) */ 7171 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7172 /* P is product->B */ 7173 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7174 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7175 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7176 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7177 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7178 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7179 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7180 mp[cp]->product->api_user = product->api_user; 7181 PetscCall(MatProductSetFromOptions(mp[cp])); 7182 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7183 PetscCall(ISGetIndices(glob, &globidx)); 7184 rmapt[cp] = 1; 7185 cmapt[cp] = 2; 7186 cmapa[cp] = globidx; 7187 mptmp[cp] = PETSC_FALSE; 7188 cp++; 7189 } else { /* A_diag * P_diag and A_diag * P_off */ 7190 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7191 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7192 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7193 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7194 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7195 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7196 mp[cp]->product->api_user = product->api_user; 7197 PetscCall(MatProductSetFromOptions(mp[cp])); 7198 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7199 rmapt[cp] = 1; 7200 cmapt[cp] = 1; 7201 mptmp[cp] = PETSC_FALSE; 7202 cp++; 7203 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7204 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7205 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7206 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7207 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7208 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7209 mp[cp]->product->api_user = product->api_user; 7210 PetscCall(MatProductSetFromOptions(mp[cp])); 7211 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7212 rmapt[cp] = 1; 7213 cmapt[cp] = 2; 7214 cmapa[cp] = p->garray; 7215 mptmp[cp] = PETSC_FALSE; 7216 cp++; 7217 } 7218 7219 /* A_off * P_other */ 7220 if (mmdata->P_oth) { 7221 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7222 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7223 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7224 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7225 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7226 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7227 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7228 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7229 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7230 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7231 mp[cp]->product->api_user = product->api_user; 7232 PetscCall(MatProductSetFromOptions(mp[cp])); 7233 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7234 rmapt[cp] = 1; 7235 cmapt[cp] = 2; 7236 cmapa[cp] = P_oth_idx; 7237 mptmp[cp] = PETSC_FALSE; 7238 cp++; 7239 } 7240 break; 7241 7242 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7243 /* A is product->B */ 7244 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7245 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7246 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7247 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7248 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7249 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7250 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7251 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7252 mp[cp]->product->api_user = product->api_user; 7253 PetscCall(MatProductSetFromOptions(mp[cp])); 7254 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7255 PetscCall(ISGetIndices(glob, &globidx)); 7256 rmapt[cp] = 2; 7257 rmapa[cp] = globidx; 7258 cmapt[cp] = 2; 7259 cmapa[cp] = globidx; 7260 mptmp[cp] = PETSC_FALSE; 7261 cp++; 7262 } else { 7263 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7264 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7265 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7266 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7267 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7268 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7269 mp[cp]->product->api_user = product->api_user; 7270 PetscCall(MatProductSetFromOptions(mp[cp])); 7271 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7272 PetscCall(ISGetIndices(glob, &globidx)); 7273 rmapt[cp] = 1; 7274 cmapt[cp] = 2; 7275 cmapa[cp] = globidx; 7276 mptmp[cp] = PETSC_FALSE; 7277 cp++; 7278 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7279 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7280 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7281 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7282 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7283 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7284 mp[cp]->product->api_user = product->api_user; 7285 PetscCall(MatProductSetFromOptions(mp[cp])); 7286 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7287 rmapt[cp] = 2; 7288 rmapa[cp] = p->garray; 7289 cmapt[cp] = 2; 7290 cmapa[cp] = globidx; 7291 mptmp[cp] = PETSC_FALSE; 7292 cp++; 7293 } 7294 break; 7295 case MATPRODUCT_PtAP: 7296 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7297 /* P is product->B */ 7298 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7299 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7300 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7301 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7302 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7303 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7304 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7305 mp[cp]->product->api_user = product->api_user; 7306 PetscCall(MatProductSetFromOptions(mp[cp])); 7307 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7308 PetscCall(ISGetIndices(glob, &globidx)); 7309 rmapt[cp] = 2; 7310 rmapa[cp] = globidx; 7311 cmapt[cp] = 2; 7312 cmapa[cp] = globidx; 7313 mptmp[cp] = PETSC_FALSE; 7314 cp++; 7315 if (mmdata->P_oth) { 7316 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7317 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7318 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7319 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7320 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7321 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7322 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7323 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7324 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7325 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7326 mp[cp]->product->api_user = product->api_user; 7327 PetscCall(MatProductSetFromOptions(mp[cp])); 7328 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7329 mptmp[cp] = PETSC_TRUE; 7330 cp++; 7331 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7332 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7333 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7334 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7335 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7336 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7337 mp[cp]->product->api_user = product->api_user; 7338 PetscCall(MatProductSetFromOptions(mp[cp])); 7339 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7340 rmapt[cp] = 2; 7341 rmapa[cp] = globidx; 7342 cmapt[cp] = 2; 7343 cmapa[cp] = P_oth_idx; 7344 mptmp[cp] = PETSC_FALSE; 7345 cp++; 7346 } 7347 break; 7348 default: 7349 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7350 } 7351 /* sanity check */ 7352 if (size > 1) 7353 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7354 7355 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7356 for (i = 0; i < cp; i++) { 7357 mmdata->mp[i] = mp[i]; 7358 mmdata->mptmp[i] = mptmp[i]; 7359 } 7360 mmdata->cp = cp; 7361 C->product->data = mmdata; 7362 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7363 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7364 7365 /* memory type */ 7366 mmdata->mtype = PETSC_MEMTYPE_HOST; 7367 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7368 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7369 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7370 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7371 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7372 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7373 7374 /* prepare coo coordinates for values insertion */ 7375 7376 /* count total nonzeros of those intermediate seqaij Mats 7377 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7378 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7379 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7380 */ 7381 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7382 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7383 if (mptmp[cp]) continue; 7384 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7385 const PetscInt *rmap = rmapa[cp]; 7386 const PetscInt mr = mp[cp]->rmap->n; 7387 const PetscInt rs = C->rmap->rstart; 7388 const PetscInt re = C->rmap->rend; 7389 const PetscInt *ii = mm->i; 7390 for (i = 0; i < mr; i++) { 7391 const PetscInt gr = rmap[i]; 7392 const PetscInt nz = ii[i + 1] - ii[i]; 7393 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7394 else ncoo_oown += nz; /* this row is local */ 7395 } 7396 } else ncoo_d += mm->nz; 7397 } 7398 7399 /* 7400 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7401 7402 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7403 7404 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7405 7406 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7407 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7408 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7409 7410 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7411 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7412 */ 7413 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7414 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7415 7416 /* gather (i,j) of nonzeros inserted by remote procs */ 7417 if (hasoffproc) { 7418 PetscSF msf; 7419 PetscInt ncoo2, *coo_i2, *coo_j2; 7420 7421 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7422 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7423 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7424 7425 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7426 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7427 PetscInt *idxoff = mmdata->off[cp]; 7428 PetscInt *idxown = mmdata->own[cp]; 7429 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7430 const PetscInt *rmap = rmapa[cp]; 7431 const PetscInt *cmap = cmapa[cp]; 7432 const PetscInt *ii = mm->i; 7433 PetscInt *coi = coo_i + ncoo_o; 7434 PetscInt *coj = coo_j + ncoo_o; 7435 const PetscInt mr = mp[cp]->rmap->n; 7436 const PetscInt rs = C->rmap->rstart; 7437 const PetscInt re = C->rmap->rend; 7438 const PetscInt cs = C->cmap->rstart; 7439 for (i = 0; i < mr; i++) { 7440 const PetscInt *jj = mm->j + ii[i]; 7441 const PetscInt gr = rmap[i]; 7442 const PetscInt nz = ii[i + 1] - ii[i]; 7443 if (gr < rs || gr >= re) { /* this is an offproc row */ 7444 for (j = ii[i]; j < ii[i + 1]; j++) { 7445 *coi++ = gr; 7446 *idxoff++ = j; 7447 } 7448 if (!cmapt[cp]) { /* already global */ 7449 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7450 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7451 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7452 } else { /* offdiag */ 7453 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7454 } 7455 ncoo_o += nz; 7456 } else { /* this is a local row */ 7457 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7458 } 7459 } 7460 } 7461 mmdata->off[cp + 1] = idxoff; 7462 mmdata->own[cp + 1] = idxown; 7463 } 7464 7465 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7466 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7467 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7468 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7469 ncoo = ncoo_d + ncoo_oown + ncoo2; 7470 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7471 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7472 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7473 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7474 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7475 PetscCall(PetscFree2(coo_i, coo_j)); 7476 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7477 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7478 coo_i = coo_i2; 7479 coo_j = coo_j2; 7480 } else { /* no offproc values insertion */ 7481 ncoo = ncoo_d; 7482 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7483 7484 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7485 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7486 PetscCall(PetscSFSetUp(mmdata->sf)); 7487 } 7488 mmdata->hasoffproc = hasoffproc; 7489 7490 /* gather (i,j) of nonzeros inserted locally */ 7491 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7492 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7493 PetscInt *coi = coo_i + ncoo_d; 7494 PetscInt *coj = coo_j + ncoo_d; 7495 const PetscInt *jj = mm->j; 7496 const PetscInt *ii = mm->i; 7497 const PetscInt *cmap = cmapa[cp]; 7498 const PetscInt *rmap = rmapa[cp]; 7499 const PetscInt mr = mp[cp]->rmap->n; 7500 const PetscInt rs = C->rmap->rstart; 7501 const PetscInt re = C->rmap->rend; 7502 const PetscInt cs = C->cmap->rstart; 7503 7504 if (mptmp[cp]) continue; 7505 if (rmapt[cp] == 1) { /* consecutive rows */ 7506 /* fill coo_i */ 7507 for (i = 0; i < mr; i++) { 7508 const PetscInt gr = i + rs; 7509 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7510 } 7511 /* fill coo_j */ 7512 if (!cmapt[cp]) { /* type-0, already global */ 7513 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7514 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7515 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7516 } else { /* type-2, local to global for sparse columns */ 7517 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7518 } 7519 ncoo_d += mm->nz; 7520 } else if (rmapt[cp] == 2) { /* sparse rows */ 7521 for (i = 0; i < mr; i++) { 7522 const PetscInt *jj = mm->j + ii[i]; 7523 const PetscInt gr = rmap[i]; 7524 const PetscInt nz = ii[i + 1] - ii[i]; 7525 if (gr >= rs && gr < re) { /* local rows */ 7526 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7527 if (!cmapt[cp]) { /* type-0, already global */ 7528 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7529 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7530 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7531 } else { /* type-2, local to global for sparse columns */ 7532 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7533 } 7534 ncoo_d += nz; 7535 } 7536 } 7537 } 7538 } 7539 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7540 PetscCall(ISDestroy(&glob)); 7541 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7542 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7543 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7544 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7545 7546 /* preallocate with COO data */ 7547 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7548 PetscCall(PetscFree2(coo_i, coo_j)); 7549 PetscFunctionReturn(PETSC_SUCCESS); 7550 } 7551 7552 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7553 { 7554 Mat_Product *product = mat->product; 7555 #if defined(PETSC_HAVE_DEVICE) 7556 PetscBool match = PETSC_FALSE; 7557 PetscBool usecpu = PETSC_FALSE; 7558 #else 7559 PetscBool match = PETSC_TRUE; 7560 #endif 7561 7562 PetscFunctionBegin; 7563 MatCheckProduct(mat, 1); 7564 #if defined(PETSC_HAVE_DEVICE) 7565 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7566 if (match) { /* we can always fallback to the CPU if requested */ 7567 switch (product->type) { 7568 case MATPRODUCT_AB: 7569 if (product->api_user) { 7570 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7571 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7572 PetscOptionsEnd(); 7573 } else { 7574 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7575 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7576 PetscOptionsEnd(); 7577 } 7578 break; 7579 case MATPRODUCT_AtB: 7580 if (product->api_user) { 7581 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7582 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7583 PetscOptionsEnd(); 7584 } else { 7585 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7586 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7587 PetscOptionsEnd(); 7588 } 7589 break; 7590 case MATPRODUCT_PtAP: 7591 if (product->api_user) { 7592 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7593 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7594 PetscOptionsEnd(); 7595 } else { 7596 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7597 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7598 PetscOptionsEnd(); 7599 } 7600 break; 7601 default: 7602 break; 7603 } 7604 match = (PetscBool)!usecpu; 7605 } 7606 #endif 7607 if (match) { 7608 switch (product->type) { 7609 case MATPRODUCT_AB: 7610 case MATPRODUCT_AtB: 7611 case MATPRODUCT_PtAP: 7612 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7613 break; 7614 default: 7615 break; 7616 } 7617 } 7618 /* fallback to MPIAIJ ops */ 7619 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7620 PetscFunctionReturn(PETSC_SUCCESS); 7621 } 7622 7623 /* 7624 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7625 7626 n - the number of block indices in cc[] 7627 cc - the block indices (must be large enough to contain the indices) 7628 */ 7629 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7630 { 7631 PetscInt cnt = -1, nidx, j; 7632 const PetscInt *idx; 7633 7634 PetscFunctionBegin; 7635 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7636 if (nidx) { 7637 cnt = 0; 7638 cc[cnt] = idx[0] / bs; 7639 for (j = 1; j < nidx; j++) { 7640 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7641 } 7642 } 7643 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7644 *n = cnt + 1; 7645 PetscFunctionReturn(PETSC_SUCCESS); 7646 } 7647 7648 /* 7649 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7650 7651 ncollapsed - the number of block indices 7652 collapsed - the block indices (must be large enough to contain the indices) 7653 */ 7654 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7655 { 7656 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7657 7658 PetscFunctionBegin; 7659 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7660 for (i = start + 1; i < start + bs; i++) { 7661 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7662 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7663 cprevtmp = cprev; 7664 cprev = merged; 7665 merged = cprevtmp; 7666 } 7667 *ncollapsed = nprev; 7668 if (collapsed) *collapsed = cprev; 7669 PetscFunctionReturn(PETSC_SUCCESS); 7670 } 7671 7672 /* 7673 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7674 */ 7675 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7676 { 7677 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7678 Mat tGmat; 7679 MPI_Comm comm; 7680 const PetscScalar *vals; 7681 const PetscInt *idx; 7682 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7683 MatScalar *AA; // this is checked in graph 7684 PetscBool isseqaij; 7685 Mat a, b, c; 7686 MatType jtype; 7687 7688 PetscFunctionBegin; 7689 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7690 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7691 PetscCall(MatGetType(Gmat, &jtype)); 7692 PetscCall(MatCreate(comm, &tGmat)); 7693 PetscCall(MatSetType(tGmat, jtype)); 7694 7695 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7696 Also, if the matrix is symmetric, can we skip this 7697 operation? It can be very expensive on large matrices. */ 7698 7699 // global sizes 7700 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7701 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7702 nloc = Iend - Istart; 7703 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7704 if (isseqaij) { 7705 a = Gmat; 7706 b = NULL; 7707 } else { 7708 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7709 a = d->A; 7710 b = d->B; 7711 garray = d->garray; 7712 } 7713 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7714 for (PetscInt row = 0; row < nloc; row++) { 7715 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7716 d_nnz[row] = ncols; 7717 if (ncols > maxcols) maxcols = ncols; 7718 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7719 } 7720 if (b) { 7721 for (PetscInt row = 0; row < nloc; row++) { 7722 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7723 o_nnz[row] = ncols; 7724 if (ncols > maxcols) maxcols = ncols; 7725 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7726 } 7727 } 7728 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7729 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7730 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7731 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7732 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7733 PetscCall(PetscFree2(d_nnz, o_nnz)); 7734 // 7735 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7736 nnz0 = nnz1 = 0; 7737 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7738 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7739 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7740 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7741 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7742 if (PetscRealPart(sv) > vfilter) { 7743 nnz1++; 7744 PetscInt cid = idx[jj] + Istart; //diag 7745 if (c != a) cid = garray[idx[jj]]; 7746 AA[ncol_row] = vals[jj]; 7747 AJ[ncol_row] = cid; 7748 ncol_row++; 7749 } 7750 } 7751 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7752 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7753 } 7754 } 7755 PetscCall(PetscFree2(AA, AJ)); 7756 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7757 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7758 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7759 7760 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7761 7762 *filteredG = tGmat; 7763 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7764 PetscFunctionReturn(PETSC_SUCCESS); 7765 } 7766 7767 /* 7768 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7769 7770 Input Parameter: 7771 . Amat - matrix 7772 - symmetrize - make the result symmetric 7773 + scale - scale with diagonal 7774 7775 Output Parameter: 7776 . a_Gmat - output scalar graph >= 0 7777 7778 */ 7779 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7780 { 7781 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7782 MPI_Comm comm; 7783 Mat Gmat; 7784 PetscBool ismpiaij, isseqaij; 7785 Mat a, b, c; 7786 MatType jtype; 7787 7788 PetscFunctionBegin; 7789 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7790 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7791 PetscCall(MatGetSize(Amat, &MM, &NN)); 7792 PetscCall(MatGetBlockSize(Amat, &bs)); 7793 nloc = (Iend - Istart) / bs; 7794 7795 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7796 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7797 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7798 7799 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7800 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7801 implementation */ 7802 if (bs > 1) { 7803 PetscCall(MatGetType(Amat, &jtype)); 7804 PetscCall(MatCreate(comm, &Gmat)); 7805 PetscCall(MatSetType(Gmat, jtype)); 7806 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7807 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7808 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7809 PetscInt *d_nnz, *o_nnz; 7810 MatScalar *aa, val, *AA; 7811 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7812 if (isseqaij) { 7813 a = Amat; 7814 b = NULL; 7815 } else { 7816 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7817 a = d->A; 7818 b = d->B; 7819 } 7820 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7821 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7822 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7823 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7824 const PetscInt *cols1, *cols2; 7825 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7826 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7827 nnz[brow / bs] = nc2 / bs; 7828 if (nc2 % bs) ok = 0; 7829 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7830 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7831 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7832 if (nc1 != nc2) ok = 0; 7833 else { 7834 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7835 if (cols1[jj] != cols2[jj]) ok = 0; 7836 if (cols1[jj] % bs != jj % bs) ok = 0; 7837 } 7838 } 7839 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7840 } 7841 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7842 if (!ok) { 7843 PetscCall(PetscFree2(d_nnz, o_nnz)); 7844 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7845 goto old_bs; 7846 } 7847 } 7848 } 7849 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7850 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7851 PetscCall(PetscFree2(d_nnz, o_nnz)); 7852 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7853 // diag 7854 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7855 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7856 ai = aseq->i; 7857 n = ai[brow + 1] - ai[brow]; 7858 aj = aseq->j + ai[brow]; 7859 for (int k = 0; k < n; k += bs) { // block columns 7860 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7861 val = 0; 7862 for (int ii = 0; ii < bs; ii++) { // rows in block 7863 aa = aseq->a + ai[brow + ii] + k; 7864 for (int jj = 0; jj < bs; jj++) { // columns in block 7865 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7866 } 7867 } 7868 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7869 AA[k / bs] = val; 7870 } 7871 grow = Istart / bs + brow / bs; 7872 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7873 } 7874 // off-diag 7875 if (ismpiaij) { 7876 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7877 const PetscScalar *vals; 7878 const PetscInt *cols, *garray = aij->garray; 7879 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7880 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7881 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7882 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7883 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7884 AA[k / bs] = 0; 7885 AJ[cidx] = garray[cols[k]] / bs; 7886 } 7887 nc = ncols / bs; 7888 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7889 for (int ii = 0; ii < bs; ii++) { // rows in block 7890 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7891 for (int k = 0; k < ncols; k += bs) { 7892 for (int jj = 0; jj < bs; jj++) { // cols in block 7893 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7894 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7895 } 7896 } 7897 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7898 } 7899 grow = Istart / bs + brow / bs; 7900 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7901 } 7902 } 7903 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7904 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7905 PetscCall(PetscFree2(AA, AJ)); 7906 } else { 7907 const PetscScalar *vals; 7908 const PetscInt *idx; 7909 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7910 old_bs: 7911 /* 7912 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7913 */ 7914 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7915 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7916 if (isseqaij) { 7917 PetscInt max_d_nnz; 7918 /* 7919 Determine exact preallocation count for (sequential) scalar matrix 7920 */ 7921 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7922 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7923 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7924 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7925 PetscCall(PetscFree3(w0, w1, w2)); 7926 } else if (ismpiaij) { 7927 Mat Daij, Oaij; 7928 const PetscInt *garray; 7929 PetscInt max_d_nnz; 7930 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7931 /* 7932 Determine exact preallocation count for diagonal block portion of scalar matrix 7933 */ 7934 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7935 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7936 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7937 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7938 PetscCall(PetscFree3(w0, w1, w2)); 7939 /* 7940 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7941 */ 7942 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7943 o_nnz[jj] = 0; 7944 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7945 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7946 o_nnz[jj] += ncols; 7947 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7948 } 7949 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7950 } 7951 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7952 /* get scalar copy (norms) of matrix */ 7953 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7954 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7955 PetscCall(PetscFree2(d_nnz, o_nnz)); 7956 for (Ii = Istart; Ii < Iend; Ii++) { 7957 PetscInt dest_row = Ii / bs; 7958 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7959 for (jj = 0; jj < ncols; jj++) { 7960 PetscInt dest_col = idx[jj] / bs; 7961 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7962 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7963 } 7964 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7965 } 7966 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7967 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7968 } 7969 } else { 7970 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7971 else { 7972 Gmat = Amat; 7973 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7974 } 7975 if (isseqaij) { 7976 a = Gmat; 7977 b = NULL; 7978 } else { 7979 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7980 a = d->A; 7981 b = d->B; 7982 } 7983 if (filter >= 0 || scale) { 7984 /* take absolute value of each entry */ 7985 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7986 MatInfo info; 7987 PetscScalar *avals; 7988 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7989 PetscCall(MatSeqAIJGetArray(c, &avals)); 7990 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7991 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7992 } 7993 } 7994 } 7995 if (symmetrize) { 7996 PetscBool isset, issym; 7997 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7998 if (!isset || !issym) { 7999 Mat matTrans; 8000 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8001 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8002 PetscCall(MatDestroy(&matTrans)); 8003 } 8004 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8005 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8006 if (scale) { 8007 /* scale c for all diagonal values = 1 or -1 */ 8008 Vec diag; 8009 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8010 PetscCall(MatGetDiagonal(Gmat, diag)); 8011 PetscCall(VecReciprocal(diag)); 8012 PetscCall(VecSqrtAbs(diag)); 8013 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8014 PetscCall(VecDestroy(&diag)); 8015 } 8016 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8017 8018 if (filter >= 0) { 8019 Mat Fmat = NULL; /* some silly compiler needs this */ 8020 8021 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8022 PetscCall(MatDestroy(&Gmat)); 8023 Gmat = Fmat; 8024 } 8025 *a_Gmat = Gmat; 8026 PetscFunctionReturn(PETSC_SUCCESS); 8027 } 8028 8029 /* 8030 Special version for direct calls from Fortran 8031 */ 8032 #include <petsc/private/fortranimpl.h> 8033 8034 /* Change these macros so can be used in void function */ 8035 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8036 #undef PetscCall 8037 #define PetscCall(...) \ 8038 do { \ 8039 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8040 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8041 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8042 return; \ 8043 } \ 8044 } while (0) 8045 8046 #undef SETERRQ 8047 #define SETERRQ(comm, ierr, ...) \ 8048 do { \ 8049 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8050 return; \ 8051 } while (0) 8052 8053 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8054 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8055 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8056 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8057 #else 8058 #endif 8059 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8060 { 8061 Mat mat = *mmat; 8062 PetscInt m = *mm, n = *mn; 8063 InsertMode addv = *maddv; 8064 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8065 PetscScalar value; 8066 8067 MatCheckPreallocated(mat, 1); 8068 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8069 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8070 { 8071 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8072 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8073 PetscBool roworiented = aij->roworiented; 8074 8075 /* Some Variables required in the macro */ 8076 Mat A = aij->A; 8077 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8078 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8079 MatScalar *aa; 8080 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8081 Mat B = aij->B; 8082 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8083 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8084 MatScalar *ba; 8085 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8086 * cannot use "#if defined" inside a macro. */ 8087 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8088 8089 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8090 PetscInt nonew = a->nonew; 8091 MatScalar *ap1, *ap2; 8092 8093 PetscFunctionBegin; 8094 PetscCall(MatSeqAIJGetArray(A, &aa)); 8095 PetscCall(MatSeqAIJGetArray(B, &ba)); 8096 for (i = 0; i < m; i++) { 8097 if (im[i] < 0) continue; 8098 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8099 if (im[i] >= rstart && im[i] < rend) { 8100 row = im[i] - rstart; 8101 lastcol1 = -1; 8102 rp1 = aj + ai[row]; 8103 ap1 = aa + ai[row]; 8104 rmax1 = aimax[row]; 8105 nrow1 = ailen[row]; 8106 low1 = 0; 8107 high1 = nrow1; 8108 lastcol2 = -1; 8109 rp2 = bj + bi[row]; 8110 ap2 = ba + bi[row]; 8111 rmax2 = bimax[row]; 8112 nrow2 = bilen[row]; 8113 low2 = 0; 8114 high2 = nrow2; 8115 8116 for (j = 0; j < n; j++) { 8117 if (roworiented) value = v[i * n + j]; 8118 else value = v[i + j * m]; 8119 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8120 if (in[j] >= cstart && in[j] < cend) { 8121 col = in[j] - cstart; 8122 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8123 } else if (in[j] < 0) continue; 8124 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8125 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8126 } else { 8127 if (mat->was_assembled) { 8128 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8129 #if defined(PETSC_USE_CTABLE) 8130 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8131 col--; 8132 #else 8133 col = aij->colmap[in[j]] - 1; 8134 #endif 8135 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8136 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8137 col = in[j]; 8138 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8139 B = aij->B; 8140 b = (Mat_SeqAIJ *)B->data; 8141 bimax = b->imax; 8142 bi = b->i; 8143 bilen = b->ilen; 8144 bj = b->j; 8145 rp2 = bj + bi[row]; 8146 ap2 = ba + bi[row]; 8147 rmax2 = bimax[row]; 8148 nrow2 = bilen[row]; 8149 low2 = 0; 8150 high2 = nrow2; 8151 bm = aij->B->rmap->n; 8152 ba = b->a; 8153 inserted = PETSC_FALSE; 8154 } 8155 } else col = in[j]; 8156 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8157 } 8158 } 8159 } else if (!aij->donotstash) { 8160 if (roworiented) { 8161 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8162 } else { 8163 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8164 } 8165 } 8166 } 8167 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8168 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8169 } 8170 PetscFunctionReturnVoid(); 8171 } 8172 8173 /* Undefining these here since they were redefined from their original definition above! No 8174 * other PETSc functions should be defined past this point, as it is impossible to recover the 8175 * original definitions */ 8176 #undef PetscCall 8177 #undef SETERRQ 8178