1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 PetscMPIInt in; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 PetscCall(PetscMPIIntCast(n, &in)); 313 if (type == NORM_INFINITY) { 314 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 315 } else { 316 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 317 } 318 PetscCall(PetscFree(work)); 319 if (type == NORM_2) { 320 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 321 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 322 for (i = 0; i < n; i++) reductions[i] /= m; 323 } 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 328 { 329 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 330 IS sis, gis; 331 const PetscInt *isis, *igis; 332 PetscInt n, *iis, nsis, ngis, rstart, i; 333 334 PetscFunctionBegin; 335 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 336 PetscCall(MatFindNonzeroRows(a->B, &gis)); 337 PetscCall(ISGetSize(gis, &ngis)); 338 PetscCall(ISGetSize(sis, &nsis)); 339 PetscCall(ISGetIndices(sis, &isis)); 340 PetscCall(ISGetIndices(gis, &igis)); 341 342 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 343 PetscCall(PetscArraycpy(iis, igis, ngis)); 344 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 345 n = ngis + nsis; 346 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 for (i = 0; i < n; i++) iis[i] += rstart; 349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 350 351 PetscCall(ISRestoreIndices(sis, &isis)); 352 PetscCall(ISRestoreIndices(gis, &igis)); 353 PetscCall(ISDestroy(&sis)); 354 PetscCall(ISDestroy(&gis)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /* 359 Local utility routine that creates a mapping from the global column 360 number to the local number in the off-diagonal part of the local 361 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 362 a slightly higher hash table cost; without it it is not scalable (each processor 363 has an order N integer array but is fast to access. 364 */ 365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 366 { 367 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 368 PetscInt n = aij->B->cmap->n, i; 369 370 PetscFunctionBegin; 371 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 372 #if defined(PETSC_USE_CTABLE) 373 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 374 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 375 #else 376 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 377 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 383 do { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col; \ 387 while (high1 - low1 > 5) { \ 388 t = (low1 + high1) / 2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i = low1; _i < high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) { \ 396 ap1[_i] += value; \ 397 /* Not sure LogFlops will slow dow the code or not */ \ 398 (void)PetscLogFlops(1.0); \ 399 } else ap1[_i] = value; \ 400 goto a_noinsert; \ 401 } \ 402 } \ 403 if (value == 0.0 && ignorezeroentries && row != col) { \ 404 low1 = 0; \ 405 high1 = nrow1; \ 406 goto a_noinsert; \ 407 } \ 408 if (nonew == 1) { \ 409 low1 = 0; \ 410 high1 = nrow1; \ 411 goto a_noinsert; \ 412 } \ 413 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 414 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 415 N = nrow1++ - 1; \ 416 a->nz++; \ 417 high1++; \ 418 /* shift up all the later entries in this row */ \ 419 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 420 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 584 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1690 break; 1691 case MAT_SUBMAT_SINGLEIS: 1692 A->submat_singleis = flg; 1693 break; 1694 case MAT_STRUCTURE_ONLY: 1695 /* The option is handled directly by MatSetOption() */ 1696 break; 1697 default: 1698 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1699 } 1700 PetscFunctionReturn(PETSC_SUCCESS); 1701 } 1702 1703 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1704 { 1705 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1706 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1707 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1708 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1709 PetscInt *cmap, *idx_p; 1710 1711 PetscFunctionBegin; 1712 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1713 mat->getrowactive = PETSC_TRUE; 1714 1715 if (!mat->rowvalues && (idx || v)) { 1716 /* 1717 allocate enough space to hold information from the longest row. 1718 */ 1719 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1720 PetscInt max = 1, tmp; 1721 for (i = 0; i < matin->rmap->n; i++) { 1722 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1723 if (max < tmp) max = tmp; 1724 } 1725 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1726 } 1727 1728 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1729 lrow = row - rstart; 1730 1731 pvA = &vworkA; 1732 pcA = &cworkA; 1733 pvB = &vworkB; 1734 pcB = &cworkB; 1735 if (!v) { 1736 pvA = NULL; 1737 pvB = NULL; 1738 } 1739 if (!idx) { 1740 pcA = NULL; 1741 if (!v) pcB = NULL; 1742 } 1743 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1744 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1745 nztot = nzA + nzB; 1746 1747 cmap = mat->garray; 1748 if (v || idx) { 1749 if (nztot) { 1750 /* Sort by increasing column numbers, assuming A and B already sorted */ 1751 PetscInt imark = -1; 1752 if (v) { 1753 *v = v_p = mat->rowvalues; 1754 for (i = 0; i < nzB; i++) { 1755 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1756 else break; 1757 } 1758 imark = i; 1759 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1760 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1761 } 1762 if (idx) { 1763 *idx = idx_p = mat->rowindices; 1764 if (imark > -1) { 1765 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1766 } else { 1767 for (i = 0; i < nzB; i++) { 1768 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1769 else break; 1770 } 1771 imark = i; 1772 } 1773 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1774 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1775 } 1776 } else { 1777 if (idx) *idx = NULL; 1778 if (v) *v = NULL; 1779 } 1780 } 1781 *nz = nztot; 1782 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1783 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1784 PetscFunctionReturn(PETSC_SUCCESS); 1785 } 1786 1787 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1788 { 1789 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1790 1791 PetscFunctionBegin; 1792 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1793 aij->getrowactive = PETSC_FALSE; 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1801 PetscInt i, j, cstart = mat->cmap->rstart; 1802 PetscReal sum = 0.0; 1803 const MatScalar *v, *amata, *bmata; 1804 PetscMPIInt iN; 1805 1806 PetscFunctionBegin; 1807 if (aij->size == 1) { 1808 PetscCall(MatNorm(aij->A, type, norm)); 1809 } else { 1810 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1811 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1812 if (type == NORM_FROBENIUS) { 1813 v = amata; 1814 for (i = 0; i < amat->nz; i++) { 1815 sum += PetscRealPart(PetscConj(*v) * (*v)); 1816 v++; 1817 } 1818 v = bmata; 1819 for (i = 0; i < bmat->nz; i++) { 1820 sum += PetscRealPart(PetscConj(*v) * (*v)); 1821 v++; 1822 } 1823 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1824 *norm = PetscSqrtReal(*norm); 1825 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1826 } else if (type == NORM_1) { /* max column norm */ 1827 PetscReal *tmp, *tmp2; 1828 PetscInt *jj, *garray = aij->garray; 1829 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1830 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1831 *norm = 0.0; 1832 v = amata; 1833 jj = amat->j; 1834 for (j = 0; j < amat->nz; j++) { 1835 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1836 v++; 1837 } 1838 v = bmata; 1839 jj = bmat->j; 1840 for (j = 0; j < bmat->nz; j++) { 1841 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1842 v++; 1843 } 1844 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1845 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1846 for (j = 0; j < mat->cmap->N; j++) { 1847 if (tmp2[j] > *norm) *norm = tmp2[j]; 1848 } 1849 PetscCall(PetscFree(tmp)); 1850 PetscCall(PetscFree(tmp2)); 1851 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1852 } else if (type == NORM_INFINITY) { /* max row norm */ 1853 PetscReal ntemp = 0.0; 1854 for (j = 0; j < aij->A->rmap->n; j++) { 1855 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1856 sum = 0.0; 1857 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1858 sum += PetscAbsScalar(*v); 1859 v++; 1860 } 1861 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1862 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1863 sum += PetscAbsScalar(*v); 1864 v++; 1865 } 1866 if (sum > ntemp) ntemp = sum; 1867 } 1868 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1869 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1870 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1871 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1872 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1873 } 1874 PetscFunctionReturn(PETSC_SUCCESS); 1875 } 1876 1877 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1878 { 1879 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1880 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1881 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1882 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1883 Mat B, A_diag, *B_diag; 1884 const MatScalar *pbv, *bv; 1885 1886 PetscFunctionBegin; 1887 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1888 ma = A->rmap->n; 1889 na = A->cmap->n; 1890 mb = a->B->rmap->n; 1891 nb = a->B->cmap->n; 1892 ai = Aloc->i; 1893 aj = Aloc->j; 1894 bi = Bloc->i; 1895 bj = Bloc->j; 1896 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1897 PetscInt *d_nnz, *g_nnz, *o_nnz; 1898 PetscSFNode *oloc; 1899 PETSC_UNUSED PetscSF sf; 1900 1901 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1902 /* compute d_nnz for preallocation */ 1903 PetscCall(PetscArrayzero(d_nnz, na)); 1904 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1905 /* compute local off-diagonal contributions */ 1906 PetscCall(PetscArrayzero(g_nnz, nb)); 1907 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1908 /* map those to global */ 1909 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1910 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1911 PetscCall(PetscSFSetFromOptions(sf)); 1912 PetscCall(PetscArrayzero(o_nnz, na)); 1913 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1914 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1915 PetscCall(PetscSFDestroy(&sf)); 1916 1917 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1918 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1919 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1920 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1921 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1922 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1923 } else { 1924 B = *matout; 1925 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1926 } 1927 1928 b = (Mat_MPIAIJ *)B->data; 1929 A_diag = a->A; 1930 B_diag = &b->A; 1931 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1932 A_diag_ncol = A_diag->cmap->N; 1933 B_diag_ilen = sub_B_diag->ilen; 1934 B_diag_i = sub_B_diag->i; 1935 1936 /* Set ilen for diagonal of B */ 1937 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1938 1939 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1940 very quickly (=without using MatSetValues), because all writes are local. */ 1941 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1942 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1943 1944 /* copy over the B part */ 1945 PetscCall(PetscMalloc1(bi[mb], &cols)); 1946 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1947 pbv = bv; 1948 row = A->rmap->rstart; 1949 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1950 cols_tmp = cols; 1951 for (i = 0; i < mb; i++) { 1952 ncol = bi[i + 1] - bi[i]; 1953 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1954 row++; 1955 if (pbv) pbv += ncol; 1956 if (cols_tmp) cols_tmp += ncol; 1957 } 1958 PetscCall(PetscFree(cols)); 1959 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1960 1961 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1962 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1963 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1964 *matout = B; 1965 } else { 1966 PetscCall(MatHeaderMerge(A, &B)); 1967 } 1968 PetscFunctionReturn(PETSC_SUCCESS); 1969 } 1970 1971 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1972 { 1973 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1974 Mat a = aij->A, b = aij->B; 1975 PetscInt s1, s2, s3; 1976 1977 PetscFunctionBegin; 1978 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1979 if (rr) { 1980 PetscCall(VecGetLocalSize(rr, &s1)); 1981 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1982 /* Overlap communication with computation. */ 1983 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1984 } 1985 if (ll) { 1986 PetscCall(VecGetLocalSize(ll, &s1)); 1987 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1988 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1989 } 1990 /* scale the diagonal block */ 1991 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1992 1993 if (rr) { 1994 /* Do a scatter end and then right scale the off-diagonal block */ 1995 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1996 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1997 } 1998 PetscFunctionReturn(PETSC_SUCCESS); 1999 } 2000 2001 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2002 { 2003 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2004 2005 PetscFunctionBegin; 2006 PetscCall(MatSetUnfactored(a->A)); 2007 PetscFunctionReturn(PETSC_SUCCESS); 2008 } 2009 2010 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2011 { 2012 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2013 Mat a, b, c, d; 2014 PetscBool flg; 2015 2016 PetscFunctionBegin; 2017 a = matA->A; 2018 b = matA->B; 2019 c = matB->A; 2020 d = matB->B; 2021 2022 PetscCall(MatEqual(a, c, &flg)); 2023 if (flg) PetscCall(MatEqual(b, d, &flg)); 2024 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2025 PetscFunctionReturn(PETSC_SUCCESS); 2026 } 2027 2028 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2029 { 2030 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2031 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2032 2033 PetscFunctionBegin; 2034 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2035 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2036 /* because of the column compression in the off-processor part of the matrix a->B, 2037 the number of columns in a->B and b->B may be different, hence we cannot call 2038 the MatCopy() directly on the two parts. If need be, we can provide a more 2039 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2040 then copying the submatrices */ 2041 PetscCall(MatCopy_Basic(A, B, str)); 2042 } else { 2043 PetscCall(MatCopy(a->A, b->A, str)); 2044 PetscCall(MatCopy(a->B, b->B, str)); 2045 } 2046 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2047 PetscFunctionReturn(PETSC_SUCCESS); 2048 } 2049 2050 /* 2051 Computes the number of nonzeros per row needed for preallocation when X and Y 2052 have different nonzero structure. 2053 */ 2054 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2055 { 2056 PetscInt i, j, k, nzx, nzy; 2057 2058 PetscFunctionBegin; 2059 /* Set the number of nonzeros in the new matrix */ 2060 for (i = 0; i < m; i++) { 2061 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2062 nzx = xi[i + 1] - xi[i]; 2063 nzy = yi[i + 1] - yi[i]; 2064 nnz[i] = 0; 2065 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2066 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2067 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2068 nnz[i]++; 2069 } 2070 for (; k < nzy; k++) nnz[i]++; 2071 } 2072 PetscFunctionReturn(PETSC_SUCCESS); 2073 } 2074 2075 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2076 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2077 { 2078 PetscInt m = Y->rmap->N; 2079 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2080 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2081 2082 PetscFunctionBegin; 2083 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2084 PetscFunctionReturn(PETSC_SUCCESS); 2085 } 2086 2087 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2088 { 2089 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2090 2091 PetscFunctionBegin; 2092 if (str == SAME_NONZERO_PATTERN) { 2093 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2094 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2095 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2096 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2097 } else { 2098 Mat B; 2099 PetscInt *nnz_d, *nnz_o; 2100 2101 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2102 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2103 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2104 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2105 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2106 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2107 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2108 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2109 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2110 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2111 PetscCall(MatHeaderMerge(Y, &B)); 2112 PetscCall(PetscFree(nnz_d)); 2113 PetscCall(PetscFree(nnz_o)); 2114 } 2115 PetscFunctionReturn(PETSC_SUCCESS); 2116 } 2117 2118 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2119 2120 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2121 { 2122 PetscFunctionBegin; 2123 if (PetscDefined(USE_COMPLEX)) { 2124 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2125 2126 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2127 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2128 } 2129 PetscFunctionReturn(PETSC_SUCCESS); 2130 } 2131 2132 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2133 { 2134 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2135 2136 PetscFunctionBegin; 2137 PetscCall(MatRealPart(a->A)); 2138 PetscCall(MatRealPart(a->B)); 2139 PetscFunctionReturn(PETSC_SUCCESS); 2140 } 2141 2142 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2143 { 2144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2145 2146 PetscFunctionBegin; 2147 PetscCall(MatImaginaryPart(a->A)); 2148 PetscCall(MatImaginaryPart(a->B)); 2149 PetscFunctionReturn(PETSC_SUCCESS); 2150 } 2151 2152 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2153 { 2154 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2155 PetscInt i, *idxb = NULL, m = A->rmap->n; 2156 PetscScalar *vv; 2157 Vec vB, vA; 2158 const PetscScalar *va, *vb; 2159 2160 PetscFunctionBegin; 2161 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2162 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2163 2164 PetscCall(VecGetArrayRead(vA, &va)); 2165 if (idx) { 2166 for (i = 0; i < m; i++) { 2167 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2168 } 2169 } 2170 2171 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2172 PetscCall(PetscMalloc1(m, &idxb)); 2173 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2174 2175 PetscCall(VecGetArrayWrite(v, &vv)); 2176 PetscCall(VecGetArrayRead(vB, &vb)); 2177 for (i = 0; i < m; i++) { 2178 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2179 vv[i] = vb[i]; 2180 if (idx) idx[i] = a->garray[idxb[i]]; 2181 } else { 2182 vv[i] = va[i]; 2183 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2184 } 2185 } 2186 PetscCall(VecRestoreArrayWrite(v, &vv)); 2187 PetscCall(VecRestoreArrayRead(vA, &va)); 2188 PetscCall(VecRestoreArrayRead(vB, &vb)); 2189 PetscCall(PetscFree(idxb)); 2190 PetscCall(VecDestroy(&vA)); 2191 PetscCall(VecDestroy(&vB)); 2192 PetscFunctionReturn(PETSC_SUCCESS); 2193 } 2194 2195 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2196 { 2197 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2198 Vec vB, vA; 2199 2200 PetscFunctionBegin; 2201 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2202 PetscCall(MatGetRowSumAbs(a->A, vA)); 2203 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2204 PetscCall(MatGetRowSumAbs(a->B, vB)); 2205 PetscCall(VecAXPY(vA, 1.0, vB)); 2206 PetscCall(VecDestroy(&vB)); 2207 PetscCall(VecCopy(vA, v)); 2208 PetscCall(VecDestroy(&vA)); 2209 PetscFunctionReturn(PETSC_SUCCESS); 2210 } 2211 2212 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2213 { 2214 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2215 PetscInt m = A->rmap->n, n = A->cmap->n; 2216 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2217 PetscInt *cmap = mat->garray; 2218 PetscInt *diagIdx, *offdiagIdx; 2219 Vec diagV, offdiagV; 2220 PetscScalar *a, *diagA, *offdiagA; 2221 const PetscScalar *ba, *bav; 2222 PetscInt r, j, col, ncols, *bi, *bj; 2223 Mat B = mat->B; 2224 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2225 2226 PetscFunctionBegin; 2227 /* When a process holds entire A and other processes have no entry */ 2228 if (A->cmap->N == n) { 2229 PetscCall(VecGetArrayWrite(v, &diagA)); 2230 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2231 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2232 PetscCall(VecDestroy(&diagV)); 2233 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2234 PetscFunctionReturn(PETSC_SUCCESS); 2235 } else if (n == 0) { 2236 if (m) { 2237 PetscCall(VecGetArrayWrite(v, &a)); 2238 for (r = 0; r < m; r++) { 2239 a[r] = 0.0; 2240 if (idx) idx[r] = -1; 2241 } 2242 PetscCall(VecRestoreArrayWrite(v, &a)); 2243 } 2244 PetscFunctionReturn(PETSC_SUCCESS); 2245 } 2246 2247 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2249 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2250 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2251 2252 /* Get offdiagIdx[] for implicit 0.0 */ 2253 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2254 ba = bav; 2255 bi = b->i; 2256 bj = b->j; 2257 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2258 for (r = 0; r < m; r++) { 2259 ncols = bi[r + 1] - bi[r]; 2260 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2261 offdiagA[r] = *ba; 2262 offdiagIdx[r] = cmap[0]; 2263 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2264 offdiagA[r] = 0.0; 2265 2266 /* Find first hole in the cmap */ 2267 for (j = 0; j < ncols; j++) { 2268 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2269 if (col > j && j < cstart) { 2270 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2271 break; 2272 } else if (col > j + n && j >= cstart) { 2273 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2274 break; 2275 } 2276 } 2277 if (j == ncols && ncols < A->cmap->N - n) { 2278 /* a hole is outside compressed Bcols */ 2279 if (ncols == 0) { 2280 if (cstart) { 2281 offdiagIdx[r] = 0; 2282 } else offdiagIdx[r] = cend; 2283 } else { /* ncols > 0 */ 2284 offdiagIdx[r] = cmap[ncols - 1] + 1; 2285 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2286 } 2287 } 2288 } 2289 2290 for (j = 0; j < ncols; j++) { 2291 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2292 offdiagA[r] = *ba; 2293 offdiagIdx[r] = cmap[*bj]; 2294 } 2295 ba++; 2296 bj++; 2297 } 2298 } 2299 2300 PetscCall(VecGetArrayWrite(v, &a)); 2301 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2302 for (r = 0; r < m; ++r) { 2303 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2304 a[r] = diagA[r]; 2305 if (idx) idx[r] = cstart + diagIdx[r]; 2306 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2307 a[r] = diagA[r]; 2308 if (idx) { 2309 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2310 idx[r] = cstart + diagIdx[r]; 2311 } else idx[r] = offdiagIdx[r]; 2312 } 2313 } else { 2314 a[r] = offdiagA[r]; 2315 if (idx) idx[r] = offdiagIdx[r]; 2316 } 2317 } 2318 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2319 PetscCall(VecRestoreArrayWrite(v, &a)); 2320 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2321 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2322 PetscCall(VecDestroy(&diagV)); 2323 PetscCall(VecDestroy(&offdiagV)); 2324 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2325 PetscFunctionReturn(PETSC_SUCCESS); 2326 } 2327 2328 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2329 { 2330 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2331 PetscInt m = A->rmap->n, n = A->cmap->n; 2332 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2333 PetscInt *cmap = mat->garray; 2334 PetscInt *diagIdx, *offdiagIdx; 2335 Vec diagV, offdiagV; 2336 PetscScalar *a, *diagA, *offdiagA; 2337 const PetscScalar *ba, *bav; 2338 PetscInt r, j, col, ncols, *bi, *bj; 2339 Mat B = mat->B; 2340 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2341 2342 PetscFunctionBegin; 2343 /* When a process holds entire A and other processes have no entry */ 2344 if (A->cmap->N == n) { 2345 PetscCall(VecGetArrayWrite(v, &diagA)); 2346 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2347 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2348 PetscCall(VecDestroy(&diagV)); 2349 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2350 PetscFunctionReturn(PETSC_SUCCESS); 2351 } else if (n == 0) { 2352 if (m) { 2353 PetscCall(VecGetArrayWrite(v, &a)); 2354 for (r = 0; r < m; r++) { 2355 a[r] = PETSC_MAX_REAL; 2356 if (idx) idx[r] = -1; 2357 } 2358 PetscCall(VecRestoreArrayWrite(v, &a)); 2359 } 2360 PetscFunctionReturn(PETSC_SUCCESS); 2361 } 2362 2363 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2364 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2365 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2366 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2367 2368 /* Get offdiagIdx[] for implicit 0.0 */ 2369 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2370 ba = bav; 2371 bi = b->i; 2372 bj = b->j; 2373 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2374 for (r = 0; r < m; r++) { 2375 ncols = bi[r + 1] - bi[r]; 2376 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2377 offdiagA[r] = *ba; 2378 offdiagIdx[r] = cmap[0]; 2379 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2380 offdiagA[r] = 0.0; 2381 2382 /* Find first hole in the cmap */ 2383 for (j = 0; j < ncols; j++) { 2384 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2385 if (col > j && j < cstart) { 2386 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2387 break; 2388 } else if (col > j + n && j >= cstart) { 2389 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2390 break; 2391 } 2392 } 2393 if (j == ncols && ncols < A->cmap->N - n) { 2394 /* a hole is outside compressed Bcols */ 2395 if (ncols == 0) { 2396 if (cstart) { 2397 offdiagIdx[r] = 0; 2398 } else offdiagIdx[r] = cend; 2399 } else { /* ncols > 0 */ 2400 offdiagIdx[r] = cmap[ncols - 1] + 1; 2401 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2402 } 2403 } 2404 } 2405 2406 for (j = 0; j < ncols; j++) { 2407 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2408 offdiagA[r] = *ba; 2409 offdiagIdx[r] = cmap[*bj]; 2410 } 2411 ba++; 2412 bj++; 2413 } 2414 } 2415 2416 PetscCall(VecGetArrayWrite(v, &a)); 2417 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2418 for (r = 0; r < m; ++r) { 2419 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2420 a[r] = diagA[r]; 2421 if (idx) idx[r] = cstart + diagIdx[r]; 2422 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2423 a[r] = diagA[r]; 2424 if (idx) { 2425 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2426 idx[r] = cstart + diagIdx[r]; 2427 } else idx[r] = offdiagIdx[r]; 2428 } 2429 } else { 2430 a[r] = offdiagA[r]; 2431 if (idx) idx[r] = offdiagIdx[r]; 2432 } 2433 } 2434 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2435 PetscCall(VecRestoreArrayWrite(v, &a)); 2436 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2437 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2438 PetscCall(VecDestroy(&diagV)); 2439 PetscCall(VecDestroy(&offdiagV)); 2440 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2441 PetscFunctionReturn(PETSC_SUCCESS); 2442 } 2443 2444 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2445 { 2446 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2447 PetscInt m = A->rmap->n, n = A->cmap->n; 2448 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2449 PetscInt *cmap = mat->garray; 2450 PetscInt *diagIdx, *offdiagIdx; 2451 Vec diagV, offdiagV; 2452 PetscScalar *a, *diagA, *offdiagA; 2453 const PetscScalar *ba, *bav; 2454 PetscInt r, j, col, ncols, *bi, *bj; 2455 Mat B = mat->B; 2456 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2457 2458 PetscFunctionBegin; 2459 /* When a process holds entire A and other processes have no entry */ 2460 if (A->cmap->N == n) { 2461 PetscCall(VecGetArrayWrite(v, &diagA)); 2462 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2463 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2464 PetscCall(VecDestroy(&diagV)); 2465 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2466 PetscFunctionReturn(PETSC_SUCCESS); 2467 } else if (n == 0) { 2468 if (m) { 2469 PetscCall(VecGetArrayWrite(v, &a)); 2470 for (r = 0; r < m; r++) { 2471 a[r] = PETSC_MIN_REAL; 2472 if (idx) idx[r] = -1; 2473 } 2474 PetscCall(VecRestoreArrayWrite(v, &a)); 2475 } 2476 PetscFunctionReturn(PETSC_SUCCESS); 2477 } 2478 2479 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2480 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2481 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2482 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2483 2484 /* Get offdiagIdx[] for implicit 0.0 */ 2485 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2486 ba = bav; 2487 bi = b->i; 2488 bj = b->j; 2489 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2490 for (r = 0; r < m; r++) { 2491 ncols = bi[r + 1] - bi[r]; 2492 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2493 offdiagA[r] = *ba; 2494 offdiagIdx[r] = cmap[0]; 2495 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2496 offdiagA[r] = 0.0; 2497 2498 /* Find first hole in the cmap */ 2499 for (j = 0; j < ncols; j++) { 2500 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2501 if (col > j && j < cstart) { 2502 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2503 break; 2504 } else if (col > j + n && j >= cstart) { 2505 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2506 break; 2507 } 2508 } 2509 if (j == ncols && ncols < A->cmap->N - n) { 2510 /* a hole is outside compressed Bcols */ 2511 if (ncols == 0) { 2512 if (cstart) { 2513 offdiagIdx[r] = 0; 2514 } else offdiagIdx[r] = cend; 2515 } else { /* ncols > 0 */ 2516 offdiagIdx[r] = cmap[ncols - 1] + 1; 2517 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2518 } 2519 } 2520 } 2521 2522 for (j = 0; j < ncols; j++) { 2523 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2524 offdiagA[r] = *ba; 2525 offdiagIdx[r] = cmap[*bj]; 2526 } 2527 ba++; 2528 bj++; 2529 } 2530 } 2531 2532 PetscCall(VecGetArrayWrite(v, &a)); 2533 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2534 for (r = 0; r < m; ++r) { 2535 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2536 a[r] = diagA[r]; 2537 if (idx) idx[r] = cstart + diagIdx[r]; 2538 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2539 a[r] = diagA[r]; 2540 if (idx) { 2541 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2542 idx[r] = cstart + diagIdx[r]; 2543 } else idx[r] = offdiagIdx[r]; 2544 } 2545 } else { 2546 a[r] = offdiagA[r]; 2547 if (idx) idx[r] = offdiagIdx[r]; 2548 } 2549 } 2550 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2551 PetscCall(VecRestoreArrayWrite(v, &a)); 2552 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2553 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2554 PetscCall(VecDestroy(&diagV)); 2555 PetscCall(VecDestroy(&offdiagV)); 2556 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2557 PetscFunctionReturn(PETSC_SUCCESS); 2558 } 2559 2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2561 { 2562 Mat *dummy; 2563 2564 PetscFunctionBegin; 2565 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2566 *newmat = *dummy; 2567 PetscCall(PetscFree(dummy)); 2568 PetscFunctionReturn(PETSC_SUCCESS); 2569 } 2570 2571 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2572 { 2573 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2574 2575 PetscFunctionBegin; 2576 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2577 A->factorerrortype = a->A->factorerrortype; 2578 PetscFunctionReturn(PETSC_SUCCESS); 2579 } 2580 2581 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2582 { 2583 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2584 2585 PetscFunctionBegin; 2586 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2587 PetscCall(MatSetRandom(aij->A, rctx)); 2588 if (x->assembled) { 2589 PetscCall(MatSetRandom(aij->B, rctx)); 2590 } else { 2591 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2592 } 2593 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2594 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2595 PetscFunctionReturn(PETSC_SUCCESS); 2596 } 2597 2598 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2599 { 2600 PetscFunctionBegin; 2601 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2602 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2603 PetscFunctionReturn(PETSC_SUCCESS); 2604 } 2605 2606 /*@ 2607 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2608 2609 Not Collective 2610 2611 Input Parameter: 2612 . A - the matrix 2613 2614 Output Parameter: 2615 . nz - the number of nonzeros 2616 2617 Level: advanced 2618 2619 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2620 @*/ 2621 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2622 { 2623 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2624 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2625 PetscBool isaij; 2626 2627 PetscFunctionBegin; 2628 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2629 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2630 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2631 PetscFunctionReturn(PETSC_SUCCESS); 2632 } 2633 2634 /*@ 2635 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2636 2637 Collective 2638 2639 Input Parameters: 2640 + A - the matrix 2641 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2642 2643 Level: advanced 2644 2645 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2646 @*/ 2647 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2648 { 2649 PetscFunctionBegin; 2650 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2651 PetscFunctionReturn(PETSC_SUCCESS); 2652 } 2653 2654 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2655 { 2656 PetscBool sc = PETSC_FALSE, flg; 2657 2658 PetscFunctionBegin; 2659 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2660 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2661 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2662 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2663 PetscOptionsHeadEnd(); 2664 PetscFunctionReturn(PETSC_SUCCESS); 2665 } 2666 2667 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2668 { 2669 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2670 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2671 2672 PetscFunctionBegin; 2673 if (!Y->preallocated) { 2674 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2675 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2676 PetscInt nonew = aij->nonew; 2677 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2678 aij->nonew = nonew; 2679 } 2680 PetscCall(MatShift_Basic(Y, a)); 2681 PetscFunctionReturn(PETSC_SUCCESS); 2682 } 2683 2684 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2685 { 2686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2687 2688 PetscFunctionBegin; 2689 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2690 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2691 if (d) { 2692 PetscInt rstart; 2693 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2694 *d += rstart; 2695 } 2696 PetscFunctionReturn(PETSC_SUCCESS); 2697 } 2698 2699 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2700 { 2701 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2702 2703 PetscFunctionBegin; 2704 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2705 PetscFunctionReturn(PETSC_SUCCESS); 2706 } 2707 2708 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2709 { 2710 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2711 2712 PetscFunctionBegin; 2713 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2714 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2715 PetscFunctionReturn(PETSC_SUCCESS); 2716 } 2717 2718 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2719 MatGetRow_MPIAIJ, 2720 MatRestoreRow_MPIAIJ, 2721 MatMult_MPIAIJ, 2722 /* 4*/ MatMultAdd_MPIAIJ, 2723 MatMultTranspose_MPIAIJ, 2724 MatMultTransposeAdd_MPIAIJ, 2725 NULL, 2726 NULL, 2727 NULL, 2728 /*10*/ NULL, 2729 NULL, 2730 NULL, 2731 MatSOR_MPIAIJ, 2732 MatTranspose_MPIAIJ, 2733 /*15*/ MatGetInfo_MPIAIJ, 2734 MatEqual_MPIAIJ, 2735 MatGetDiagonal_MPIAIJ, 2736 MatDiagonalScale_MPIAIJ, 2737 MatNorm_MPIAIJ, 2738 /*20*/ MatAssemblyBegin_MPIAIJ, 2739 MatAssemblyEnd_MPIAIJ, 2740 MatSetOption_MPIAIJ, 2741 MatZeroEntries_MPIAIJ, 2742 /*24*/ MatZeroRows_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 NULL, 2747 /*29*/ MatSetUp_MPI_Hash, 2748 NULL, 2749 NULL, 2750 MatGetDiagonalBlock_MPIAIJ, 2751 NULL, 2752 /*34*/ MatDuplicate_MPIAIJ, 2753 NULL, 2754 NULL, 2755 NULL, 2756 NULL, 2757 /*39*/ MatAXPY_MPIAIJ, 2758 MatCreateSubMatrices_MPIAIJ, 2759 MatIncreaseOverlap_MPIAIJ, 2760 MatGetValues_MPIAIJ, 2761 MatCopy_MPIAIJ, 2762 /*44*/ MatGetRowMax_MPIAIJ, 2763 MatScale_MPIAIJ, 2764 MatShift_MPIAIJ, 2765 MatDiagonalSet_MPIAIJ, 2766 MatZeroRowsColumns_MPIAIJ, 2767 /*49*/ MatSetRandom_MPIAIJ, 2768 MatGetRowIJ_MPIAIJ, 2769 MatRestoreRowIJ_MPIAIJ, 2770 NULL, 2771 NULL, 2772 /*54*/ MatFDColoringCreate_MPIXAIJ, 2773 NULL, 2774 MatSetUnfactored_MPIAIJ, 2775 MatPermute_MPIAIJ, 2776 NULL, 2777 /*59*/ MatCreateSubMatrix_MPIAIJ, 2778 MatDestroy_MPIAIJ, 2779 MatView_MPIAIJ, 2780 NULL, 2781 NULL, 2782 /*64*/ NULL, 2783 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2784 NULL, 2785 NULL, 2786 NULL, 2787 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2788 MatGetRowMinAbs_MPIAIJ, 2789 NULL, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*75*/ MatFDColoringApply_AIJ, 2794 MatSetFromOptions_MPIAIJ, 2795 NULL, 2796 NULL, 2797 MatFindZeroDiagonals_MPIAIJ, 2798 /*80*/ NULL, 2799 NULL, 2800 NULL, 2801 /*83*/ MatLoad_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 NULL, 2807 /*89*/ NULL, 2808 NULL, 2809 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2810 NULL, 2811 NULL, 2812 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2813 NULL, 2814 NULL, 2815 NULL, 2816 MatBindToCPU_MPIAIJ, 2817 /*99*/ MatProductSetFromOptions_MPIAIJ, 2818 NULL, 2819 NULL, 2820 MatConjugate_MPIAIJ, 2821 NULL, 2822 /*104*/ MatSetValuesRow_MPIAIJ, 2823 MatRealPart_MPIAIJ, 2824 MatImaginaryPart_MPIAIJ, 2825 NULL, 2826 NULL, 2827 /*109*/ NULL, 2828 NULL, 2829 MatGetRowMin_MPIAIJ, 2830 NULL, 2831 MatMissingDiagonal_MPIAIJ, 2832 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2833 NULL, 2834 MatGetGhosts_MPIAIJ, 2835 NULL, 2836 NULL, 2837 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2838 NULL, 2839 NULL, 2840 NULL, 2841 MatGetMultiProcBlock_MPIAIJ, 2842 /*124*/ MatFindNonzeroRows_MPIAIJ, 2843 MatGetColumnReductions_MPIAIJ, 2844 MatInvertBlockDiagonal_MPIAIJ, 2845 MatInvertVariableBlockDiagonal_MPIAIJ, 2846 MatCreateSubMatricesMPI_MPIAIJ, 2847 /*129*/ NULL, 2848 NULL, 2849 NULL, 2850 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2851 NULL, 2852 /*134*/ NULL, 2853 NULL, 2854 NULL, 2855 NULL, 2856 NULL, 2857 /*139*/ MatSetBlockSizes_MPIAIJ, 2858 NULL, 2859 NULL, 2860 MatFDColoringSetUp_MPIXAIJ, 2861 MatFindOffBlockDiagonalEntries_MPIAIJ, 2862 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2863 /*145*/ NULL, 2864 NULL, 2865 NULL, 2866 MatCreateGraph_Simple_AIJ, 2867 NULL, 2868 /*150*/ NULL, 2869 MatEliminateZeros_MPIAIJ, 2870 MatGetRowSumAbs_MPIAIJ, 2871 NULL, 2872 NULL, 2873 NULL}; 2874 2875 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2876 { 2877 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2878 2879 PetscFunctionBegin; 2880 PetscCall(MatStoreValues(aij->A)); 2881 PetscCall(MatStoreValues(aij->B)); 2882 PetscFunctionReturn(PETSC_SUCCESS); 2883 } 2884 2885 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2886 { 2887 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2888 2889 PetscFunctionBegin; 2890 PetscCall(MatRetrieveValues(aij->A)); 2891 PetscCall(MatRetrieveValues(aij->B)); 2892 PetscFunctionReturn(PETSC_SUCCESS); 2893 } 2894 2895 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2896 { 2897 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2898 PetscMPIInt size; 2899 2900 PetscFunctionBegin; 2901 if (B->hash_active) { 2902 B->ops[0] = b->cops; 2903 B->hash_active = PETSC_FALSE; 2904 } 2905 PetscCall(PetscLayoutSetUp(B->rmap)); 2906 PetscCall(PetscLayoutSetUp(B->cmap)); 2907 2908 #if defined(PETSC_USE_CTABLE) 2909 PetscCall(PetscHMapIDestroy(&b->colmap)); 2910 #else 2911 PetscCall(PetscFree(b->colmap)); 2912 #endif 2913 PetscCall(PetscFree(b->garray)); 2914 PetscCall(VecDestroy(&b->lvec)); 2915 PetscCall(VecScatterDestroy(&b->Mvctx)); 2916 2917 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2918 2919 MatSeqXAIJGetOptions_Private(b->B); 2920 PetscCall(MatDestroy(&b->B)); 2921 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2922 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2923 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2924 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2925 MatSeqXAIJRestoreOptions_Private(b->B); 2926 2927 MatSeqXAIJGetOptions_Private(b->A); 2928 PetscCall(MatDestroy(&b->A)); 2929 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2930 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2931 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2932 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2933 MatSeqXAIJRestoreOptions_Private(b->A); 2934 2935 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2936 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2937 B->preallocated = PETSC_TRUE; 2938 B->was_assembled = PETSC_FALSE; 2939 B->assembled = PETSC_FALSE; 2940 PetscFunctionReturn(PETSC_SUCCESS); 2941 } 2942 2943 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2944 { 2945 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2946 2947 PetscFunctionBegin; 2948 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2949 PetscCall(PetscLayoutSetUp(B->rmap)); 2950 PetscCall(PetscLayoutSetUp(B->cmap)); 2951 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2952 else { 2953 #if defined(PETSC_USE_CTABLE) 2954 PetscCall(PetscHMapIDestroy(&b->colmap)); 2955 #else 2956 PetscCall(PetscFree(b->colmap)); 2957 #endif 2958 PetscCall(PetscFree(b->garray)); 2959 PetscCall(VecDestroy(&b->lvec)); 2960 } 2961 PetscCall(VecScatterDestroy(&b->Mvctx)); 2962 2963 PetscCall(MatResetPreallocation(b->A)); 2964 PetscCall(MatResetPreallocation(b->B)); 2965 B->preallocated = PETSC_TRUE; 2966 B->was_assembled = PETSC_FALSE; 2967 B->assembled = PETSC_FALSE; 2968 PetscFunctionReturn(PETSC_SUCCESS); 2969 } 2970 2971 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2972 { 2973 Mat mat; 2974 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2975 2976 PetscFunctionBegin; 2977 *newmat = NULL; 2978 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2979 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2980 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2981 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2982 a = (Mat_MPIAIJ *)mat->data; 2983 2984 mat->factortype = matin->factortype; 2985 mat->assembled = matin->assembled; 2986 mat->insertmode = NOT_SET_VALUES; 2987 2988 a->size = oldmat->size; 2989 a->rank = oldmat->rank; 2990 a->donotstash = oldmat->donotstash; 2991 a->roworiented = oldmat->roworiented; 2992 a->rowindices = NULL; 2993 a->rowvalues = NULL; 2994 a->getrowactive = PETSC_FALSE; 2995 2996 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2997 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2998 if (matin->hash_active) { 2999 PetscCall(MatSetUp(mat)); 3000 } else { 3001 mat->preallocated = matin->preallocated; 3002 if (oldmat->colmap) { 3003 #if defined(PETSC_USE_CTABLE) 3004 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3005 #else 3006 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3007 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3008 #endif 3009 } else a->colmap = NULL; 3010 if (oldmat->garray) { 3011 PetscInt len; 3012 len = oldmat->B->cmap->n; 3013 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3014 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3015 } else a->garray = NULL; 3016 3017 /* It may happen MatDuplicate is called with a non-assembled matrix 3018 In fact, MatDuplicate only requires the matrix to be preallocated 3019 This may happen inside a DMCreateMatrix_Shell */ 3020 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3021 if (oldmat->Mvctx) { 3022 a->Mvctx = oldmat->Mvctx; 3023 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3024 } 3025 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3026 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3027 } 3028 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3029 *newmat = mat; 3030 PetscFunctionReturn(PETSC_SUCCESS); 3031 } 3032 3033 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3034 { 3035 PetscBool isbinary, ishdf5; 3036 3037 PetscFunctionBegin; 3038 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3039 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3040 /* force binary viewer to load .info file if it has not yet done so */ 3041 PetscCall(PetscViewerSetUp(viewer)); 3042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3043 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3044 if (isbinary) { 3045 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3046 } else if (ishdf5) { 3047 #if defined(PETSC_HAVE_HDF5) 3048 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3049 #else 3050 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3051 #endif 3052 } else { 3053 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3054 } 3055 PetscFunctionReturn(PETSC_SUCCESS); 3056 } 3057 3058 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3059 { 3060 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3061 PetscInt *rowidxs, *colidxs; 3062 PetscScalar *matvals; 3063 3064 PetscFunctionBegin; 3065 PetscCall(PetscViewerSetUp(viewer)); 3066 3067 /* read in matrix header */ 3068 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3069 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3070 M = header[1]; 3071 N = header[2]; 3072 nz = header[3]; 3073 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3074 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3075 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3076 3077 /* set block sizes from the viewer's .info file */ 3078 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3079 /* set global sizes if not set already */ 3080 if (mat->rmap->N < 0) mat->rmap->N = M; 3081 if (mat->cmap->N < 0) mat->cmap->N = N; 3082 PetscCall(PetscLayoutSetUp(mat->rmap)); 3083 PetscCall(PetscLayoutSetUp(mat->cmap)); 3084 3085 /* check if the matrix sizes are correct */ 3086 PetscCall(MatGetSize(mat, &rows, &cols)); 3087 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3088 3089 /* read in row lengths and build row indices */ 3090 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3091 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3092 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3093 rowidxs[0] = 0; 3094 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3095 if (nz != PETSC_INT_MAX) { 3096 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3097 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3098 } 3099 3100 /* read in column indices and matrix values */ 3101 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3102 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3103 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3104 /* store matrix indices and values */ 3105 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3106 PetscCall(PetscFree(rowidxs)); 3107 PetscCall(PetscFree2(colidxs, matvals)); 3108 PetscFunctionReturn(PETSC_SUCCESS); 3109 } 3110 3111 /* Not scalable because of ISAllGather() unless getting all columns. */ 3112 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3113 { 3114 IS iscol_local; 3115 PetscBool isstride; 3116 PetscMPIInt gisstride = 0; 3117 3118 PetscFunctionBegin; 3119 /* check if we are grabbing all columns*/ 3120 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3121 3122 if (isstride) { 3123 PetscInt start, len, mstart, mlen; 3124 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3125 PetscCall(ISGetLocalSize(iscol, &len)); 3126 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3127 if (mstart == start && mlen - mstart == len) gisstride = 1; 3128 } 3129 3130 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3131 if (gisstride) { 3132 PetscInt N; 3133 PetscCall(MatGetSize(mat, NULL, &N)); 3134 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3135 PetscCall(ISSetIdentity(iscol_local)); 3136 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3137 } else { 3138 PetscInt cbs; 3139 PetscCall(ISGetBlockSize(iscol, &cbs)); 3140 PetscCall(ISAllGather(iscol, &iscol_local)); 3141 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3142 } 3143 3144 *isseq = iscol_local; 3145 PetscFunctionReturn(PETSC_SUCCESS); 3146 } 3147 3148 /* 3149 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3150 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3151 3152 Input Parameters: 3153 + mat - matrix 3154 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3155 i.e., mat->rstart <= isrow[i] < mat->rend 3156 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3157 i.e., mat->cstart <= iscol[i] < mat->cend 3158 3159 Output Parameters: 3160 + isrow_d - sequential row index set for retrieving mat->A 3161 . iscol_d - sequential column index set for retrieving mat->A 3162 . iscol_o - sequential column index set for retrieving mat->B 3163 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3164 */ 3165 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3166 { 3167 Vec x, cmap; 3168 const PetscInt *is_idx; 3169 PetscScalar *xarray, *cmaparray; 3170 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3171 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3172 Mat B = a->B; 3173 Vec lvec = a->lvec, lcmap; 3174 PetscInt i, cstart, cend, Bn = B->cmap->N; 3175 MPI_Comm comm; 3176 VecScatter Mvctx = a->Mvctx; 3177 3178 PetscFunctionBegin; 3179 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3180 PetscCall(ISGetLocalSize(iscol, &ncols)); 3181 3182 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3183 PetscCall(MatCreateVecs(mat, &x, NULL)); 3184 PetscCall(VecSet(x, -1.0)); 3185 PetscCall(VecDuplicate(x, &cmap)); 3186 PetscCall(VecSet(cmap, -1.0)); 3187 3188 /* Get start indices */ 3189 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3190 isstart -= ncols; 3191 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3192 3193 PetscCall(ISGetIndices(iscol, &is_idx)); 3194 PetscCall(VecGetArray(x, &xarray)); 3195 PetscCall(VecGetArray(cmap, &cmaparray)); 3196 PetscCall(PetscMalloc1(ncols, &idx)); 3197 for (i = 0; i < ncols; i++) { 3198 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3199 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3200 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3201 } 3202 PetscCall(VecRestoreArray(x, &xarray)); 3203 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3204 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3205 3206 /* Get iscol_d */ 3207 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3208 PetscCall(ISGetBlockSize(iscol, &i)); 3209 PetscCall(ISSetBlockSize(*iscol_d, i)); 3210 3211 /* Get isrow_d */ 3212 PetscCall(ISGetLocalSize(isrow, &m)); 3213 rstart = mat->rmap->rstart; 3214 PetscCall(PetscMalloc1(m, &idx)); 3215 PetscCall(ISGetIndices(isrow, &is_idx)); 3216 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3217 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3218 3219 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3220 PetscCall(ISGetBlockSize(isrow, &i)); 3221 PetscCall(ISSetBlockSize(*isrow_d, i)); 3222 3223 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3224 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3225 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3226 3227 PetscCall(VecDuplicate(lvec, &lcmap)); 3228 3229 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3230 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3231 3232 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3233 /* off-process column indices */ 3234 count = 0; 3235 PetscCall(PetscMalloc1(Bn, &idx)); 3236 PetscCall(PetscMalloc1(Bn, &cmap1)); 3237 3238 PetscCall(VecGetArray(lvec, &xarray)); 3239 PetscCall(VecGetArray(lcmap, &cmaparray)); 3240 for (i = 0; i < Bn; i++) { 3241 if (PetscRealPart(xarray[i]) > -1.0) { 3242 idx[count] = i; /* local column index in off-diagonal part B */ 3243 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3244 count++; 3245 } 3246 } 3247 PetscCall(VecRestoreArray(lvec, &xarray)); 3248 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3249 3250 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3251 /* cannot ensure iscol_o has same blocksize as iscol! */ 3252 3253 PetscCall(PetscFree(idx)); 3254 *garray = cmap1; 3255 3256 PetscCall(VecDestroy(&x)); 3257 PetscCall(VecDestroy(&cmap)); 3258 PetscCall(VecDestroy(&lcmap)); 3259 PetscFunctionReturn(PETSC_SUCCESS); 3260 } 3261 3262 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3263 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3264 { 3265 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3266 Mat M = NULL; 3267 MPI_Comm comm; 3268 IS iscol_d, isrow_d, iscol_o; 3269 Mat Asub = NULL, Bsub = NULL; 3270 PetscInt n; 3271 3272 PetscFunctionBegin; 3273 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3274 3275 if (call == MAT_REUSE_MATRIX) { 3276 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3277 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3278 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3279 3280 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3281 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3282 3283 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3284 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3285 3286 /* Update diagonal and off-diagonal portions of submat */ 3287 asub = (Mat_MPIAIJ *)(*submat)->data; 3288 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3289 PetscCall(ISGetLocalSize(iscol_o, &n)); 3290 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3291 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3292 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3293 3294 } else { /* call == MAT_INITIAL_MATRIX) */ 3295 const PetscInt *garray; 3296 PetscInt BsubN; 3297 3298 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3299 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3300 3301 /* Create local submatrices Asub and Bsub */ 3302 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3303 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3304 3305 /* Create submatrix M */ 3306 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3307 3308 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3309 asub = (Mat_MPIAIJ *)M->data; 3310 3311 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3312 n = asub->B->cmap->N; 3313 if (BsubN > n) { 3314 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3315 const PetscInt *idx; 3316 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3317 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3318 3319 PetscCall(PetscMalloc1(n, &idx_new)); 3320 j = 0; 3321 PetscCall(ISGetIndices(iscol_o, &idx)); 3322 for (i = 0; i < n; i++) { 3323 if (j >= BsubN) break; 3324 while (subgarray[i] > garray[j]) j++; 3325 3326 if (subgarray[i] == garray[j]) { 3327 idx_new[i] = idx[j++]; 3328 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3329 } 3330 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3331 3332 PetscCall(ISDestroy(&iscol_o)); 3333 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3334 3335 } else if (BsubN < n) { 3336 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3337 } 3338 3339 PetscCall(PetscFree(garray)); 3340 *submat = M; 3341 3342 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3343 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3344 PetscCall(ISDestroy(&isrow_d)); 3345 3346 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3347 PetscCall(ISDestroy(&iscol_d)); 3348 3349 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3350 PetscCall(ISDestroy(&iscol_o)); 3351 } 3352 PetscFunctionReturn(PETSC_SUCCESS); 3353 } 3354 3355 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3356 { 3357 IS iscol_local = NULL, isrow_d; 3358 PetscInt csize; 3359 PetscInt n, i, j, start, end; 3360 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3361 MPI_Comm comm; 3362 3363 PetscFunctionBegin; 3364 /* If isrow has same processor distribution as mat, 3365 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3366 if (call == MAT_REUSE_MATRIX) { 3367 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3368 if (isrow_d) { 3369 sameRowDist = PETSC_TRUE; 3370 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3371 } else { 3372 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3373 if (iscol_local) { 3374 sameRowDist = PETSC_TRUE; 3375 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3376 } 3377 } 3378 } else { 3379 /* Check if isrow has same processor distribution as mat */ 3380 sameDist[0] = PETSC_FALSE; 3381 PetscCall(ISGetLocalSize(isrow, &n)); 3382 if (!n) { 3383 sameDist[0] = PETSC_TRUE; 3384 } else { 3385 PetscCall(ISGetMinMax(isrow, &i, &j)); 3386 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3387 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3388 } 3389 3390 /* Check if iscol has same processor distribution as mat */ 3391 sameDist[1] = PETSC_FALSE; 3392 PetscCall(ISGetLocalSize(iscol, &n)); 3393 if (!n) { 3394 sameDist[1] = PETSC_TRUE; 3395 } else { 3396 PetscCall(ISGetMinMax(iscol, &i, &j)); 3397 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3398 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3399 } 3400 3401 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3402 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3403 sameRowDist = tsameDist[0]; 3404 } 3405 3406 if (sameRowDist) { 3407 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3408 /* isrow and iscol have same processor distribution as mat */ 3409 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3410 PetscFunctionReturn(PETSC_SUCCESS); 3411 } else { /* sameRowDist */ 3412 /* isrow has same processor distribution as mat */ 3413 if (call == MAT_INITIAL_MATRIX) { 3414 PetscBool sorted; 3415 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3416 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3417 PetscCall(ISGetSize(iscol, &i)); 3418 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3419 3420 PetscCall(ISSorted(iscol_local, &sorted)); 3421 if (sorted) { 3422 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3423 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 } else { /* call == MAT_REUSE_MATRIX */ 3427 IS iscol_sub; 3428 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3429 if (iscol_sub) { 3430 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3431 PetscFunctionReturn(PETSC_SUCCESS); 3432 } 3433 } 3434 } 3435 } 3436 3437 /* General case: iscol -> iscol_local which has global size of iscol */ 3438 if (call == MAT_REUSE_MATRIX) { 3439 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3440 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3441 } else { 3442 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3443 } 3444 3445 PetscCall(ISGetLocalSize(iscol, &csize)); 3446 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3447 3448 if (call == MAT_INITIAL_MATRIX) { 3449 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3450 PetscCall(ISDestroy(&iscol_local)); 3451 } 3452 PetscFunctionReturn(PETSC_SUCCESS); 3453 } 3454 3455 /*@C 3456 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3457 and "off-diagonal" part of the matrix in CSR format. 3458 3459 Collective 3460 3461 Input Parameters: 3462 + comm - MPI communicator 3463 . A - "diagonal" portion of matrix 3464 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3465 - garray - global index of `B` columns 3466 3467 Output Parameter: 3468 . mat - the matrix, with input `A` as its local diagonal matrix 3469 3470 Level: advanced 3471 3472 Notes: 3473 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3474 3475 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3476 3477 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3478 @*/ 3479 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3480 { 3481 Mat_MPIAIJ *maij; 3482 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3483 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3484 const PetscScalar *oa; 3485 Mat Bnew; 3486 PetscInt m, n, N; 3487 MatType mpi_mat_type; 3488 3489 PetscFunctionBegin; 3490 PetscCall(MatCreate(comm, mat)); 3491 PetscCall(MatGetSize(A, &m, &n)); 3492 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3493 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3494 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3495 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3496 3497 /* Get global columns of mat */ 3498 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3499 3500 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3501 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3502 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3503 PetscCall(MatSetType(*mat, mpi_mat_type)); 3504 3505 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3506 maij = (Mat_MPIAIJ *)(*mat)->data; 3507 3508 (*mat)->preallocated = PETSC_TRUE; 3509 3510 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3511 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3512 3513 /* Set A as diagonal portion of *mat */ 3514 maij->A = A; 3515 3516 nz = oi[m]; 3517 for (i = 0; i < nz; i++) { 3518 col = oj[i]; 3519 oj[i] = garray[col]; 3520 } 3521 3522 /* Set Bnew as off-diagonal portion of *mat */ 3523 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3524 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3525 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3526 bnew = (Mat_SeqAIJ *)Bnew->data; 3527 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3528 maij->B = Bnew; 3529 3530 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3531 3532 b->free_a = PETSC_FALSE; 3533 b->free_ij = PETSC_FALSE; 3534 PetscCall(MatDestroy(&B)); 3535 3536 bnew->free_a = PETSC_TRUE; 3537 bnew->free_ij = PETSC_TRUE; 3538 3539 /* condense columns of maij->B */ 3540 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3541 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3542 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3543 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3544 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3545 PetscFunctionReturn(PETSC_SUCCESS); 3546 } 3547 3548 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3549 3550 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3551 { 3552 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3553 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3554 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3555 Mat M, Msub, B = a->B; 3556 MatScalar *aa; 3557 Mat_SeqAIJ *aij; 3558 PetscInt *garray = a->garray, *colsub, Ncols; 3559 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3560 IS iscol_sub, iscmap; 3561 const PetscInt *is_idx, *cmap; 3562 PetscBool allcolumns = PETSC_FALSE; 3563 MPI_Comm comm; 3564 3565 PetscFunctionBegin; 3566 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3567 if (call == MAT_REUSE_MATRIX) { 3568 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3569 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3570 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3571 3572 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3573 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3574 3575 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3576 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3577 3578 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3579 3580 } else { /* call == MAT_INITIAL_MATRIX) */ 3581 PetscBool flg; 3582 3583 PetscCall(ISGetLocalSize(iscol, &n)); 3584 PetscCall(ISGetSize(iscol, &Ncols)); 3585 3586 /* (1) iscol -> nonscalable iscol_local */ 3587 /* Check for special case: each processor gets entire matrix columns */ 3588 PetscCall(ISIdentity(iscol_local, &flg)); 3589 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3590 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3591 if (allcolumns) { 3592 iscol_sub = iscol_local; 3593 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3594 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3595 3596 } else { 3597 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3598 PetscInt *idx, *cmap1, k; 3599 PetscCall(PetscMalloc1(Ncols, &idx)); 3600 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3601 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3602 count = 0; 3603 k = 0; 3604 for (i = 0; i < Ncols; i++) { 3605 j = is_idx[i]; 3606 if (j >= cstart && j < cend) { 3607 /* diagonal part of mat */ 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } else if (Bn) { 3611 /* off-diagonal part of mat */ 3612 if (j == garray[k]) { 3613 idx[count] = j; 3614 cmap1[count++] = i; /* column index in submat */ 3615 } else if (j > garray[k]) { 3616 while (j > garray[k] && k < Bn - 1) k++; 3617 if (j == garray[k]) { 3618 idx[count] = j; 3619 cmap1[count++] = i; /* column index in submat */ 3620 } 3621 } 3622 } 3623 } 3624 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3625 3626 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3627 PetscCall(ISGetBlockSize(iscol, &cbs)); 3628 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3629 3630 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3631 } 3632 3633 /* (3) Create sequential Msub */ 3634 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3635 } 3636 3637 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3638 aij = (Mat_SeqAIJ *)Msub->data; 3639 ii = aij->i; 3640 PetscCall(ISGetIndices(iscmap, &cmap)); 3641 3642 /* 3643 m - number of local rows 3644 Ncols - number of columns (same on all processors) 3645 rstart - first row in new global matrix generated 3646 */ 3647 PetscCall(MatGetSize(Msub, &m, NULL)); 3648 3649 if (call == MAT_INITIAL_MATRIX) { 3650 /* (4) Create parallel newmat */ 3651 PetscMPIInt rank, size; 3652 PetscInt csize; 3653 3654 PetscCallMPI(MPI_Comm_size(comm, &size)); 3655 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3656 3657 /* 3658 Determine the number of non-zeros in the diagonal and off-diagonal 3659 portions of the matrix in order to do correct preallocation 3660 */ 3661 3662 /* first get start and end of "diagonal" columns */ 3663 PetscCall(ISGetLocalSize(iscol, &csize)); 3664 if (csize == PETSC_DECIDE) { 3665 PetscCall(ISGetSize(isrow, &mglobal)); 3666 if (mglobal == Ncols) { /* square matrix */ 3667 nlocal = m; 3668 } else { 3669 nlocal = Ncols / size + ((Ncols % size) > rank); 3670 } 3671 } else { 3672 nlocal = csize; 3673 } 3674 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3675 rstart = rend - nlocal; 3676 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3677 3678 /* next, compute all the lengths */ 3679 jj = aij->j; 3680 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3681 olens = dlens + m; 3682 for (i = 0; i < m; i++) { 3683 jend = ii[i + 1] - ii[i]; 3684 olen = 0; 3685 dlen = 0; 3686 for (j = 0; j < jend; j++) { 3687 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3688 else dlen++; 3689 jj++; 3690 } 3691 olens[i] = olen; 3692 dlens[i] = dlen; 3693 } 3694 3695 PetscCall(ISGetBlockSize(isrow, &bs)); 3696 PetscCall(ISGetBlockSize(iscol, &cbs)); 3697 3698 PetscCall(MatCreate(comm, &M)); 3699 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3700 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3701 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3702 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3703 PetscCall(PetscFree(dlens)); 3704 3705 } else { /* call == MAT_REUSE_MATRIX */ 3706 M = *newmat; 3707 PetscCall(MatGetLocalSize(M, &i, NULL)); 3708 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3709 PetscCall(MatZeroEntries(M)); 3710 /* 3711 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3712 rather than the slower MatSetValues(). 3713 */ 3714 M->was_assembled = PETSC_TRUE; 3715 M->assembled = PETSC_FALSE; 3716 } 3717 3718 /* (5) Set values of Msub to *newmat */ 3719 PetscCall(PetscMalloc1(count, &colsub)); 3720 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3721 3722 jj = aij->j; 3723 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3724 for (i = 0; i < m; i++) { 3725 row = rstart + i; 3726 nz = ii[i + 1] - ii[i]; 3727 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3728 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3729 jj += nz; 3730 aa += nz; 3731 } 3732 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3733 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3734 3735 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3736 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3737 3738 PetscCall(PetscFree(colsub)); 3739 3740 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3741 if (call == MAT_INITIAL_MATRIX) { 3742 *newmat = M; 3743 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3744 PetscCall(MatDestroy(&Msub)); 3745 3746 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3747 PetscCall(ISDestroy(&iscol_sub)); 3748 3749 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3750 PetscCall(ISDestroy(&iscmap)); 3751 3752 if (iscol_local) { 3753 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3754 PetscCall(ISDestroy(&iscol_local)); 3755 } 3756 } 3757 PetscFunctionReturn(PETSC_SUCCESS); 3758 } 3759 3760 /* 3761 Not great since it makes two copies of the submatrix, first an SeqAIJ 3762 in local and then by concatenating the local matrices the end result. 3763 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3764 3765 This requires a sequential iscol with all indices. 3766 */ 3767 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3768 { 3769 PetscMPIInt rank, size; 3770 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3771 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3772 Mat M, Mreuse; 3773 MatScalar *aa, *vwork; 3774 MPI_Comm comm; 3775 Mat_SeqAIJ *aij; 3776 PetscBool colflag, allcolumns = PETSC_FALSE; 3777 3778 PetscFunctionBegin; 3779 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3780 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3781 PetscCallMPI(MPI_Comm_size(comm, &size)); 3782 3783 /* Check for special case: each processor gets entire matrix columns */ 3784 PetscCall(ISIdentity(iscol, &colflag)); 3785 PetscCall(ISGetLocalSize(iscol, &n)); 3786 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3787 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3788 3789 if (call == MAT_REUSE_MATRIX) { 3790 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3791 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3792 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3793 } else { 3794 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3795 } 3796 3797 /* 3798 m - number of local rows 3799 n - number of columns (same on all processors) 3800 rstart - first row in new global matrix generated 3801 */ 3802 PetscCall(MatGetSize(Mreuse, &m, &n)); 3803 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3804 if (call == MAT_INITIAL_MATRIX) { 3805 aij = (Mat_SeqAIJ *)Mreuse->data; 3806 ii = aij->i; 3807 jj = aij->j; 3808 3809 /* 3810 Determine the number of non-zeros in the diagonal and off-diagonal 3811 portions of the matrix in order to do correct preallocation 3812 */ 3813 3814 /* first get start and end of "diagonal" columns */ 3815 if (csize == PETSC_DECIDE) { 3816 PetscCall(ISGetSize(isrow, &mglobal)); 3817 if (mglobal == n) { /* square matrix */ 3818 nlocal = m; 3819 } else { 3820 nlocal = n / size + ((n % size) > rank); 3821 } 3822 } else { 3823 nlocal = csize; 3824 } 3825 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3826 rstart = rend - nlocal; 3827 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3828 3829 /* next, compute all the lengths */ 3830 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3831 olens = dlens + m; 3832 for (i = 0; i < m; i++) { 3833 jend = ii[i + 1] - ii[i]; 3834 olen = 0; 3835 dlen = 0; 3836 for (j = 0; j < jend; j++) { 3837 if (*jj < rstart || *jj >= rend) olen++; 3838 else dlen++; 3839 jj++; 3840 } 3841 olens[i] = olen; 3842 dlens[i] = dlen; 3843 } 3844 PetscCall(MatCreate(comm, &M)); 3845 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3846 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3847 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3848 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3849 PetscCall(PetscFree(dlens)); 3850 } else { 3851 PetscInt ml, nl; 3852 3853 M = *newmat; 3854 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3855 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3856 PetscCall(MatZeroEntries(M)); 3857 /* 3858 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3859 rather than the slower MatSetValues(). 3860 */ 3861 M->was_assembled = PETSC_TRUE; 3862 M->assembled = PETSC_FALSE; 3863 } 3864 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3865 aij = (Mat_SeqAIJ *)Mreuse->data; 3866 ii = aij->i; 3867 jj = aij->j; 3868 3869 /* trigger copy to CPU if needed */ 3870 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3871 for (i = 0; i < m; i++) { 3872 row = rstart + i; 3873 nz = ii[i + 1] - ii[i]; 3874 cwork = jj; 3875 jj = PetscSafePointerPlusOffset(jj, nz); 3876 vwork = aa; 3877 aa = PetscSafePointerPlusOffset(aa, nz); 3878 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3879 } 3880 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3881 3882 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3883 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3884 *newmat = M; 3885 3886 /* save submatrix used in processor for next request */ 3887 if (call == MAT_INITIAL_MATRIX) { 3888 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3889 PetscCall(MatDestroy(&Mreuse)); 3890 } 3891 PetscFunctionReturn(PETSC_SUCCESS); 3892 } 3893 3894 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3895 { 3896 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3897 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3898 const PetscInt *JJ; 3899 PetscBool nooffprocentries; 3900 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3901 3902 PetscFunctionBegin; 3903 PetscCall(PetscLayoutSetUp(B->rmap)); 3904 PetscCall(PetscLayoutSetUp(B->cmap)); 3905 m = B->rmap->n; 3906 cstart = B->cmap->rstart; 3907 cend = B->cmap->rend; 3908 rstart = B->rmap->rstart; 3909 irstart = Ii[0]; 3910 3911 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3912 3913 if (PetscDefined(USE_DEBUG)) { 3914 for (i = 0; i < m; i++) { 3915 nnz = Ii[i + 1] - Ii[i]; 3916 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3917 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3918 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3919 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3920 } 3921 } 3922 3923 for (i = 0; i < m; i++) { 3924 nnz = Ii[i + 1] - Ii[i]; 3925 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3926 nnz_max = PetscMax(nnz_max, nnz); 3927 d = 0; 3928 for (j = 0; j < nnz; j++) { 3929 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3930 } 3931 d_nnz[i] = d; 3932 o_nnz[i] = nnz - d; 3933 } 3934 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3935 PetscCall(PetscFree2(d_nnz, o_nnz)); 3936 3937 for (i = 0; i < m; i++) { 3938 ii = i + rstart; 3939 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3940 } 3941 nooffprocentries = B->nooffprocentries; 3942 B->nooffprocentries = PETSC_TRUE; 3943 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3944 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3945 B->nooffprocentries = nooffprocentries; 3946 3947 /* count number of entries below block diagonal */ 3948 PetscCall(PetscFree(Aij->ld)); 3949 PetscCall(PetscCalloc1(m, &ld)); 3950 Aij->ld = ld; 3951 for (i = 0; i < m; i++) { 3952 nnz = Ii[i + 1] - Ii[i]; 3953 j = 0; 3954 while (j < nnz && J[j] < cstart) j++; 3955 ld[i] = j; 3956 if (J) J += nnz; 3957 } 3958 3959 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3960 PetscFunctionReturn(PETSC_SUCCESS); 3961 } 3962 3963 /*@ 3964 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3965 (the default parallel PETSc format). 3966 3967 Collective 3968 3969 Input Parameters: 3970 + B - the matrix 3971 . i - the indices into `j` for the start of each local row (indices start with zero) 3972 . j - the column indices for each local row (indices start with zero) 3973 - v - optional values in the matrix 3974 3975 Level: developer 3976 3977 Notes: 3978 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3979 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3980 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3981 3982 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3983 3984 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3985 3986 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3987 3988 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3989 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3990 3991 The format which is used for the sparse matrix input, is equivalent to a 3992 row-major ordering.. i.e for the following matrix, the input data expected is 3993 as shown 3994 .vb 3995 1 0 0 3996 2 0 3 P0 3997 ------- 3998 4 5 6 P1 3999 4000 Process0 [P0] rows_owned=[0,1] 4001 i = {0,1,3} [size = nrow+1 = 2+1] 4002 j = {0,0,2} [size = 3] 4003 v = {1,2,3} [size = 3] 4004 4005 Process1 [P1] rows_owned=[2] 4006 i = {0,3} [size = nrow+1 = 1+1] 4007 j = {0,1,2} [size = 3] 4008 v = {4,5,6} [size = 3] 4009 .ve 4010 4011 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4012 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4013 @*/ 4014 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4015 { 4016 PetscFunctionBegin; 4017 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4018 PetscFunctionReturn(PETSC_SUCCESS); 4019 } 4020 4021 /*@ 4022 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4023 (the default parallel PETSc format). For good matrix assembly performance 4024 the user should preallocate the matrix storage by setting the parameters 4025 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4026 4027 Collective 4028 4029 Input Parameters: 4030 + B - the matrix 4031 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4032 (same value is used for all local rows) 4033 . d_nnz - array containing the number of nonzeros in the various rows of the 4034 DIAGONAL portion of the local submatrix (possibly different for each row) 4035 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4036 The size of this array is equal to the number of local rows, i.e 'm'. 4037 For matrices that will be factored, you must leave room for (and set) 4038 the diagonal entry even if it is zero. 4039 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4040 submatrix (same value is used for all local rows). 4041 - o_nnz - array containing the number of nonzeros in the various rows of the 4042 OFF-DIAGONAL portion of the local submatrix (possibly different for 4043 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4044 structure. The size of this array is equal to the number 4045 of local rows, i.e 'm'. 4046 4047 Example Usage: 4048 Consider the following 8x8 matrix with 34 non-zero values, that is 4049 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4050 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4051 as follows 4052 4053 .vb 4054 1 2 0 | 0 3 0 | 0 4 4055 Proc0 0 5 6 | 7 0 0 | 8 0 4056 9 0 10 | 11 0 0 | 12 0 4057 ------------------------------------- 4058 13 0 14 | 15 16 17 | 0 0 4059 Proc1 0 18 0 | 19 20 21 | 0 0 4060 0 0 0 | 22 23 0 | 24 0 4061 ------------------------------------- 4062 Proc2 25 26 27 | 0 0 28 | 29 0 4063 30 0 0 | 31 32 33 | 0 34 4064 .ve 4065 4066 This can be represented as a collection of submatrices as 4067 .vb 4068 A B C 4069 D E F 4070 G H I 4071 .ve 4072 4073 Where the submatrices A,B,C are owned by proc0, D,E,F are 4074 owned by proc1, G,H,I are owned by proc2. 4075 4076 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4077 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4078 The 'M','N' parameters are 8,8, and have the same values on all procs. 4079 4080 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4081 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4082 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4083 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4084 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4085 matrix, and [DF] as another `MATSEQAIJ` matrix. 4086 4087 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4088 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4089 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4090 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4091 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4092 In this case, the values of `d_nz`, `o_nz` are 4093 .vb 4094 proc0 dnz = 2, o_nz = 2 4095 proc1 dnz = 3, o_nz = 2 4096 proc2 dnz = 1, o_nz = 4 4097 .ve 4098 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4099 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4100 for proc3. i.e we are using 12+15+10=37 storage locations to store 4101 34 values. 4102 4103 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4104 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4105 In the above case the values for `d_nnz`, `o_nnz` are 4106 .vb 4107 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4108 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4109 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4110 .ve 4111 Here the space allocated is sum of all the above values i.e 34, and 4112 hence pre-allocation is perfect. 4113 4114 Level: intermediate 4115 4116 Notes: 4117 If the *_nnz parameter is given then the *_nz parameter is ignored 4118 4119 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4120 storage. The stored row and column indices begin with zero. 4121 See [Sparse Matrices](sec_matsparse) for details. 4122 4123 The parallel matrix is partitioned such that the first m0 rows belong to 4124 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4125 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4126 4127 The DIAGONAL portion of the local submatrix of a processor can be defined 4128 as the submatrix which is obtained by extraction the part corresponding to 4129 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4130 first row that belongs to the processor, r2 is the last row belonging to 4131 the this processor, and c1-c2 is range of indices of the local part of a 4132 vector suitable for applying the matrix to. This is an mxn matrix. In the 4133 common case of a square matrix, the row and column ranges are the same and 4134 the DIAGONAL part is also square. The remaining portion of the local 4135 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4136 4137 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4138 4139 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4140 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4141 You can also run with the option `-info` and look for messages with the string 4142 malloc in them to see if additional memory allocation was needed. 4143 4144 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4145 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4146 @*/ 4147 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4148 { 4149 PetscFunctionBegin; 4150 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4151 PetscValidType(B, 1); 4152 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4153 PetscFunctionReturn(PETSC_SUCCESS); 4154 } 4155 4156 /*@ 4157 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4158 CSR format for the local rows. 4159 4160 Collective 4161 4162 Input Parameters: 4163 + comm - MPI communicator 4164 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4165 . n - This value should be the same as the local size used in creating the 4166 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4167 calculated if `N` is given) For square matrices n is almost always `m`. 4168 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4169 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4170 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4171 . j - global column indices 4172 - a - optional matrix values 4173 4174 Output Parameter: 4175 . mat - the matrix 4176 4177 Level: intermediate 4178 4179 Notes: 4180 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4181 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4182 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4183 4184 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4185 4186 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4187 4188 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4189 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4190 4191 The format which is used for the sparse matrix input, is equivalent to a 4192 row-major ordering, i.e., for the following matrix, the input data expected is 4193 as shown 4194 .vb 4195 1 0 0 4196 2 0 3 P0 4197 ------- 4198 4 5 6 P1 4199 4200 Process0 [P0] rows_owned=[0,1] 4201 i = {0,1,3} [size = nrow+1 = 2+1] 4202 j = {0,0,2} [size = 3] 4203 v = {1,2,3} [size = 3] 4204 4205 Process1 [P1] rows_owned=[2] 4206 i = {0,3} [size = nrow+1 = 1+1] 4207 j = {0,1,2} [size = 3] 4208 v = {4,5,6} [size = 3] 4209 .ve 4210 4211 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4212 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4213 @*/ 4214 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4215 { 4216 PetscFunctionBegin; 4217 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4218 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4219 PetscCall(MatCreate(comm, mat)); 4220 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4221 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4222 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4223 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4224 PetscFunctionReturn(PETSC_SUCCESS); 4225 } 4226 4227 /*@ 4228 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4229 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4230 from `MatCreateMPIAIJWithArrays()` 4231 4232 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4233 4234 Collective 4235 4236 Input Parameters: 4237 + mat - the matrix 4238 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4239 . n - This value should be the same as the local size used in creating the 4240 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4241 calculated if N is given) For square matrices n is almost always m. 4242 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4243 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4244 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4245 . J - column indices 4246 - v - matrix values 4247 4248 Level: deprecated 4249 4250 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4251 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4252 @*/ 4253 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4254 { 4255 PetscInt nnz, i; 4256 PetscBool nooffprocentries; 4257 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4258 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4259 PetscScalar *ad, *ao; 4260 PetscInt ldi, Iii, md; 4261 const PetscInt *Adi = Ad->i; 4262 PetscInt *ld = Aij->ld; 4263 4264 PetscFunctionBegin; 4265 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4266 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4267 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4268 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4269 4270 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4271 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4272 4273 for (i = 0; i < m; i++) { 4274 if (PetscDefined(USE_DEBUG)) { 4275 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4276 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4277 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4278 } 4279 } 4280 nnz = Ii[i + 1] - Ii[i]; 4281 Iii = Ii[i]; 4282 ldi = ld[i]; 4283 md = Adi[i + 1] - Adi[i]; 4284 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4285 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4286 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4287 ad += md; 4288 ao += nnz - md; 4289 } 4290 nooffprocentries = mat->nooffprocentries; 4291 mat->nooffprocentries = PETSC_TRUE; 4292 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4293 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4294 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4295 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4296 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4297 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4298 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4299 mat->nooffprocentries = nooffprocentries; 4300 PetscFunctionReturn(PETSC_SUCCESS); 4301 } 4302 4303 /*@ 4304 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4305 4306 Collective 4307 4308 Input Parameters: 4309 + mat - the matrix 4310 - v - matrix values, stored by row 4311 4312 Level: intermediate 4313 4314 Notes: 4315 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4316 4317 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4318 4319 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4320 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4321 @*/ 4322 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4323 { 4324 PetscInt nnz, i, m; 4325 PetscBool nooffprocentries; 4326 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4327 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4328 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4329 PetscScalar *ad, *ao; 4330 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4331 PetscInt ldi, Iii, md; 4332 PetscInt *ld = Aij->ld; 4333 4334 PetscFunctionBegin; 4335 m = mat->rmap->n; 4336 4337 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4338 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4339 Iii = 0; 4340 for (i = 0; i < m; i++) { 4341 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4342 ldi = ld[i]; 4343 md = Adi[i + 1] - Adi[i]; 4344 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4345 ad += md; 4346 if (ao) { 4347 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4348 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4349 ao += nnz - md; 4350 } 4351 Iii += nnz; 4352 } 4353 nooffprocentries = mat->nooffprocentries; 4354 mat->nooffprocentries = PETSC_TRUE; 4355 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4356 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4357 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4358 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4359 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4360 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4361 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4362 mat->nooffprocentries = nooffprocentries; 4363 PetscFunctionReturn(PETSC_SUCCESS); 4364 } 4365 4366 /*@ 4367 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4368 (the default parallel PETSc format). For good matrix assembly performance 4369 the user should preallocate the matrix storage by setting the parameters 4370 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4371 4372 Collective 4373 4374 Input Parameters: 4375 + comm - MPI communicator 4376 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4377 This value should be the same as the local size used in creating the 4378 y vector for the matrix-vector product y = Ax. 4379 . n - This value should be the same as the local size used in creating the 4380 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4381 calculated if N is given) For square matrices n is almost always m. 4382 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4383 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4384 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4385 (same value is used for all local rows) 4386 . d_nnz - array containing the number of nonzeros in the various rows of the 4387 DIAGONAL portion of the local submatrix (possibly different for each row) 4388 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4389 The size of this array is equal to the number of local rows, i.e 'm'. 4390 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4391 submatrix (same value is used for all local rows). 4392 - o_nnz - array containing the number of nonzeros in the various rows of the 4393 OFF-DIAGONAL portion of the local submatrix (possibly different for 4394 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4395 structure. The size of this array is equal to the number 4396 of local rows, i.e 'm'. 4397 4398 Output Parameter: 4399 . A - the matrix 4400 4401 Options Database Keys: 4402 + -mat_no_inode - Do not use inodes 4403 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4404 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4405 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4406 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4407 4408 Level: intermediate 4409 4410 Notes: 4411 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4412 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4413 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4414 4415 If the *_nnz parameter is given then the *_nz parameter is ignored 4416 4417 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4418 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4419 storage requirements for this matrix. 4420 4421 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4422 processor than it must be used on all processors that share the object for 4423 that argument. 4424 4425 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4426 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4427 4428 The user MUST specify either the local or global matrix dimensions 4429 (possibly both). 4430 4431 The parallel matrix is partitioned across processors such that the 4432 first `m0` rows belong to process 0, the next `m1` rows belong to 4433 process 1, the next `m2` rows belong to process 2, etc., where 4434 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4435 values corresponding to [m x N] submatrix. 4436 4437 The columns are logically partitioned with the n0 columns belonging 4438 to 0th partition, the next n1 columns belonging to the next 4439 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4440 4441 The DIAGONAL portion of the local submatrix on any given processor 4442 is the submatrix corresponding to the rows and columns m,n 4443 corresponding to the given processor. i.e diagonal matrix on 4444 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4445 etc. The remaining portion of the local submatrix [m x (N-n)] 4446 constitute the OFF-DIAGONAL portion. The example below better 4447 illustrates this concept. The two matrices, the DIAGONAL portion and 4448 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4449 4450 For a square global matrix we define each processor's diagonal portion 4451 to be its local rows and the corresponding columns (a square submatrix); 4452 each processor's off-diagonal portion encompasses the remainder of the 4453 local matrix (a rectangular submatrix). 4454 4455 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4456 4457 When calling this routine with a single process communicator, a matrix of 4458 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4459 type of communicator, use the construction mechanism 4460 .vb 4461 MatCreate(..., &A); 4462 MatSetType(A, MATMPIAIJ); 4463 MatSetSizes(A, m, n, M, N); 4464 MatMPIAIJSetPreallocation(A, ...); 4465 .ve 4466 4467 By default, this format uses inodes (identical nodes) when possible. 4468 We search for consecutive rows with the same nonzero structure, thereby 4469 reusing matrix information to achieve increased efficiency. 4470 4471 Example Usage: 4472 Consider the following 8x8 matrix with 34 non-zero values, that is 4473 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4474 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4475 as follows 4476 4477 .vb 4478 1 2 0 | 0 3 0 | 0 4 4479 Proc0 0 5 6 | 7 0 0 | 8 0 4480 9 0 10 | 11 0 0 | 12 0 4481 ------------------------------------- 4482 13 0 14 | 15 16 17 | 0 0 4483 Proc1 0 18 0 | 19 20 21 | 0 0 4484 0 0 0 | 22 23 0 | 24 0 4485 ------------------------------------- 4486 Proc2 25 26 27 | 0 0 28 | 29 0 4487 30 0 0 | 31 32 33 | 0 34 4488 .ve 4489 4490 This can be represented as a collection of submatrices as 4491 4492 .vb 4493 A B C 4494 D E F 4495 G H I 4496 .ve 4497 4498 Where the submatrices A,B,C are owned by proc0, D,E,F are 4499 owned by proc1, G,H,I are owned by proc2. 4500 4501 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4502 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4503 The 'M','N' parameters are 8,8, and have the same values on all procs. 4504 4505 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4506 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4507 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4508 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4509 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4510 matrix, and [DF] as another SeqAIJ matrix. 4511 4512 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4513 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4514 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4515 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4516 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4517 In this case, the values of `d_nz`,`o_nz` are 4518 .vb 4519 proc0 dnz = 2, o_nz = 2 4520 proc1 dnz = 3, o_nz = 2 4521 proc2 dnz = 1, o_nz = 4 4522 .ve 4523 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4524 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4525 for proc3. i.e we are using 12+15+10=37 storage locations to store 4526 34 values. 4527 4528 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4529 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4530 In the above case the values for d_nnz,o_nnz are 4531 .vb 4532 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4533 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4534 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4535 .ve 4536 Here the space allocated is sum of all the above values i.e 34, and 4537 hence pre-allocation is perfect. 4538 4539 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4540 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4541 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4542 @*/ 4543 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4544 { 4545 PetscMPIInt size; 4546 4547 PetscFunctionBegin; 4548 PetscCall(MatCreate(comm, A)); 4549 PetscCall(MatSetSizes(*A, m, n, M, N)); 4550 PetscCallMPI(MPI_Comm_size(comm, &size)); 4551 if (size > 1) { 4552 PetscCall(MatSetType(*A, MATMPIAIJ)); 4553 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4554 } else { 4555 PetscCall(MatSetType(*A, MATSEQAIJ)); 4556 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4557 } 4558 PetscFunctionReturn(PETSC_SUCCESS); 4559 } 4560 4561 /*MC 4562 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4563 4564 Synopsis: 4565 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4566 4567 Not Collective 4568 4569 Input Parameter: 4570 . A - the `MATMPIAIJ` matrix 4571 4572 Output Parameters: 4573 + Ad - the diagonal portion of the matrix 4574 . Ao - the off-diagonal portion of the matrix 4575 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4576 - ierr - error code 4577 4578 Level: advanced 4579 4580 Note: 4581 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4582 4583 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4584 M*/ 4585 4586 /*MC 4587 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4588 4589 Synopsis: 4590 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4591 4592 Not Collective 4593 4594 Input Parameters: 4595 + A - the `MATMPIAIJ` matrix 4596 . Ad - the diagonal portion of the matrix 4597 . Ao - the off-diagonal portion of the matrix 4598 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4599 - ierr - error code 4600 4601 Level: advanced 4602 4603 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4604 M*/ 4605 4606 /*@C 4607 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4608 4609 Not Collective 4610 4611 Input Parameter: 4612 . A - The `MATMPIAIJ` matrix 4613 4614 Output Parameters: 4615 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4616 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4617 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4618 4619 Level: intermediate 4620 4621 Note: 4622 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4623 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4624 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4625 local column numbers to global column numbers in the original matrix. 4626 4627 Fortran Notes: 4628 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4629 4630 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4631 @*/ 4632 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4633 { 4634 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4635 PetscBool flg; 4636 4637 PetscFunctionBegin; 4638 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4639 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4640 if (Ad) *Ad = a->A; 4641 if (Ao) *Ao = a->B; 4642 if (colmap) *colmap = a->garray; 4643 PetscFunctionReturn(PETSC_SUCCESS); 4644 } 4645 4646 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4647 { 4648 PetscInt m, N, i, rstart, nnz, Ii; 4649 PetscInt *indx; 4650 PetscScalar *values; 4651 MatType rootType; 4652 4653 PetscFunctionBegin; 4654 PetscCall(MatGetSize(inmat, &m, &N)); 4655 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4656 PetscInt *dnz, *onz, sum, bs, cbs; 4657 4658 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4659 /* Check sum(n) = N */ 4660 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4661 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4662 4663 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4664 rstart -= m; 4665 4666 MatPreallocateBegin(comm, m, n, dnz, onz); 4667 for (i = 0; i < m; i++) { 4668 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4669 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4670 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4671 } 4672 4673 PetscCall(MatCreate(comm, outmat)); 4674 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4675 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4676 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4677 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4678 PetscCall(MatSetType(*outmat, rootType)); 4679 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4680 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4681 MatPreallocateEnd(dnz, onz); 4682 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4683 } 4684 4685 /* numeric phase */ 4686 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4687 for (i = 0; i < m; i++) { 4688 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4689 Ii = i + rstart; 4690 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4691 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4692 } 4693 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4694 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4695 PetscFunctionReturn(PETSC_SUCCESS); 4696 } 4697 4698 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4699 { 4700 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4701 4702 PetscFunctionBegin; 4703 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4704 PetscCall(PetscFree(merge->id_r)); 4705 PetscCall(PetscFree(merge->len_s)); 4706 PetscCall(PetscFree(merge->len_r)); 4707 PetscCall(PetscFree(merge->bi)); 4708 PetscCall(PetscFree(merge->bj)); 4709 PetscCall(PetscFree(merge->buf_ri[0])); 4710 PetscCall(PetscFree(merge->buf_ri)); 4711 PetscCall(PetscFree(merge->buf_rj[0])); 4712 PetscCall(PetscFree(merge->buf_rj)); 4713 PetscCall(PetscFree(merge->coi)); 4714 PetscCall(PetscFree(merge->coj)); 4715 PetscCall(PetscFree(merge->owners_co)); 4716 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4717 PetscCall(PetscFree(merge)); 4718 PetscFunctionReturn(PETSC_SUCCESS); 4719 } 4720 4721 #include <../src/mat/utils/freespace.h> 4722 #include <petscbt.h> 4723 4724 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4725 { 4726 MPI_Comm comm; 4727 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4728 PetscMPIInt size, rank, taga, *len_s; 4729 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4730 PetscMPIInt proc, k; 4731 PetscInt **buf_ri, **buf_rj; 4732 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4733 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4734 MPI_Request *s_waits, *r_waits; 4735 MPI_Status *status; 4736 const MatScalar *aa, *a_a; 4737 MatScalar **abuf_r, *ba_i; 4738 Mat_Merge_SeqsToMPI *merge; 4739 PetscContainer container; 4740 4741 PetscFunctionBegin; 4742 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4743 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4744 4745 PetscCallMPI(MPI_Comm_size(comm, &size)); 4746 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4747 4748 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4749 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4750 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4751 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4752 aa = a_a; 4753 4754 bi = merge->bi; 4755 bj = merge->bj; 4756 buf_ri = merge->buf_ri; 4757 buf_rj = merge->buf_rj; 4758 4759 PetscCall(PetscMalloc1(size, &status)); 4760 owners = merge->rowmap->range; 4761 len_s = merge->len_s; 4762 4763 /* send and recv matrix values */ 4764 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4765 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4766 4767 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4768 for (proc = 0, k = 0; proc < size; proc++) { 4769 if (!len_s[proc]) continue; 4770 i = owners[proc]; 4771 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4772 k++; 4773 } 4774 4775 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4776 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4777 PetscCall(PetscFree(status)); 4778 4779 PetscCall(PetscFree(s_waits)); 4780 PetscCall(PetscFree(r_waits)); 4781 4782 /* insert mat values of mpimat */ 4783 PetscCall(PetscMalloc1(N, &ba_i)); 4784 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4785 4786 for (k = 0; k < merge->nrecv; k++) { 4787 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4788 nrows = *buf_ri_k[k]; 4789 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4790 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4791 } 4792 4793 /* set values of ba */ 4794 m = merge->rowmap->n; 4795 for (i = 0; i < m; i++) { 4796 arow = owners[rank] + i; 4797 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4798 bnzi = bi[i + 1] - bi[i]; 4799 PetscCall(PetscArrayzero(ba_i, bnzi)); 4800 4801 /* add local non-zero vals of this proc's seqmat into ba */ 4802 anzi = ai[arow + 1] - ai[arow]; 4803 aj = a->j + ai[arow]; 4804 aa = a_a + ai[arow]; 4805 nextaj = 0; 4806 for (j = 0; nextaj < anzi; j++) { 4807 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4808 ba_i[j] += aa[nextaj++]; 4809 } 4810 } 4811 4812 /* add received vals into ba */ 4813 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4814 /* i-th row */ 4815 if (i == *nextrow[k]) { 4816 anzi = *(nextai[k] + 1) - *nextai[k]; 4817 aj = buf_rj[k] + *nextai[k]; 4818 aa = abuf_r[k] + *nextai[k]; 4819 nextaj = 0; 4820 for (j = 0; nextaj < anzi; j++) { 4821 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4822 ba_i[j] += aa[nextaj++]; 4823 } 4824 } 4825 nextrow[k]++; 4826 nextai[k]++; 4827 } 4828 } 4829 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4830 } 4831 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4832 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4833 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4834 4835 PetscCall(PetscFree(abuf_r[0])); 4836 PetscCall(PetscFree(abuf_r)); 4837 PetscCall(PetscFree(ba_i)); 4838 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4839 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4840 PetscFunctionReturn(PETSC_SUCCESS); 4841 } 4842 4843 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4844 { 4845 Mat B_mpi; 4846 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4847 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4848 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4849 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4850 PetscInt len, *dnz, *onz, bs, cbs; 4851 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4852 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4853 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4854 MPI_Status *status; 4855 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4856 PetscBT lnkbt; 4857 Mat_Merge_SeqsToMPI *merge; 4858 PetscContainer container; 4859 4860 PetscFunctionBegin; 4861 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4862 4863 /* make sure it is a PETSc comm */ 4864 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4865 PetscCallMPI(MPI_Comm_size(comm, &size)); 4866 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4867 4868 PetscCall(PetscNew(&merge)); 4869 PetscCall(PetscMalloc1(size, &status)); 4870 4871 /* determine row ownership */ 4872 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4873 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4874 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4875 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4876 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4877 PetscCall(PetscMalloc1(size, &len_si)); 4878 PetscCall(PetscMalloc1(size, &merge->len_s)); 4879 4880 m = merge->rowmap->n; 4881 owners = merge->rowmap->range; 4882 4883 /* determine the number of messages to send, their lengths */ 4884 len_s = merge->len_s; 4885 4886 len = 0; /* length of buf_si[] */ 4887 merge->nsend = 0; 4888 for (PetscMPIInt proc = 0; proc < size; proc++) { 4889 len_si[proc] = 0; 4890 if (proc == rank) { 4891 len_s[proc] = 0; 4892 } else { 4893 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4894 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4895 } 4896 if (len_s[proc]) { 4897 merge->nsend++; 4898 nrows = 0; 4899 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4900 if (ai[i + 1] > ai[i]) nrows++; 4901 } 4902 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4903 len += len_si[proc]; 4904 } 4905 } 4906 4907 /* determine the number and length of messages to receive for ij-structure */ 4908 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4909 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4910 4911 /* post the Irecv of j-structure */ 4912 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4913 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4914 4915 /* post the Isend of j-structure */ 4916 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4917 4918 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4919 if (!len_s[proc]) continue; 4920 i = owners[proc]; 4921 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4922 k++; 4923 } 4924 4925 /* receives and sends of j-structure are complete */ 4926 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4927 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4928 4929 /* send and recv i-structure */ 4930 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4931 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4932 4933 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4934 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4935 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4936 if (!len_s[proc]) continue; 4937 /* form outgoing message for i-structure: 4938 buf_si[0]: nrows to be sent 4939 [1:nrows]: row index (global) 4940 [nrows+1:2*nrows+1]: i-structure index 4941 */ 4942 nrows = len_si[proc] / 2 - 1; 4943 buf_si_i = buf_si + nrows + 1; 4944 buf_si[0] = nrows; 4945 buf_si_i[0] = 0; 4946 nrows = 0; 4947 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4948 anzi = ai[i + 1] - ai[i]; 4949 if (anzi) { 4950 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4951 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4952 nrows++; 4953 } 4954 } 4955 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4956 k++; 4957 buf_si += len_si[proc]; 4958 } 4959 4960 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4961 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4962 4963 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4964 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4965 4966 PetscCall(PetscFree(len_si)); 4967 PetscCall(PetscFree(len_ri)); 4968 PetscCall(PetscFree(rj_waits)); 4969 PetscCall(PetscFree2(si_waits, sj_waits)); 4970 PetscCall(PetscFree(ri_waits)); 4971 PetscCall(PetscFree(buf_s)); 4972 PetscCall(PetscFree(status)); 4973 4974 /* compute a local seq matrix in each processor */ 4975 /* allocate bi array and free space for accumulating nonzero column info */ 4976 PetscCall(PetscMalloc1(m + 1, &bi)); 4977 bi[0] = 0; 4978 4979 /* create and initialize a linked list */ 4980 nlnk = N + 1; 4981 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4982 4983 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4984 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4985 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4986 4987 current_space = free_space; 4988 4989 /* determine symbolic info for each local row */ 4990 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4991 4992 for (k = 0; k < merge->nrecv; k++) { 4993 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4994 nrows = *buf_ri_k[k]; 4995 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4996 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4997 } 4998 4999 MatPreallocateBegin(comm, m, n, dnz, onz); 5000 len = 0; 5001 for (i = 0; i < m; i++) { 5002 bnzi = 0; 5003 /* add local non-zero cols of this proc's seqmat into lnk */ 5004 arow = owners[rank] + i; 5005 anzi = ai[arow + 1] - ai[arow]; 5006 aj = a->j + ai[arow]; 5007 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5008 bnzi += nlnk; 5009 /* add received col data into lnk */ 5010 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5011 if (i == *nextrow[k]) { /* i-th row */ 5012 anzi = *(nextai[k] + 1) - *nextai[k]; 5013 aj = buf_rj[k] + *nextai[k]; 5014 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5015 bnzi += nlnk; 5016 nextrow[k]++; 5017 nextai[k]++; 5018 } 5019 } 5020 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5021 5022 /* if free space is not available, make more free space */ 5023 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5024 /* copy data into free space, then initialize lnk */ 5025 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5026 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5027 5028 current_space->array += bnzi; 5029 current_space->local_used += bnzi; 5030 current_space->local_remaining -= bnzi; 5031 5032 bi[i + 1] = bi[i] + bnzi; 5033 } 5034 5035 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5036 5037 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5038 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5039 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5040 5041 /* create symbolic parallel matrix B_mpi */ 5042 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5043 PetscCall(MatCreate(comm, &B_mpi)); 5044 if (n == PETSC_DECIDE) { 5045 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5046 } else { 5047 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5048 } 5049 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5050 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5051 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5052 MatPreallocateEnd(dnz, onz); 5053 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5054 5055 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5056 B_mpi->assembled = PETSC_FALSE; 5057 merge->bi = bi; 5058 merge->bj = bj; 5059 merge->buf_ri = buf_ri; 5060 merge->buf_rj = buf_rj; 5061 merge->coi = NULL; 5062 merge->coj = NULL; 5063 merge->owners_co = NULL; 5064 5065 PetscCall(PetscCommDestroy(&comm)); 5066 5067 /* attach the supporting struct to B_mpi for reuse */ 5068 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5069 PetscCall(PetscContainerSetPointer(container, merge)); 5070 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5071 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5072 PetscCall(PetscContainerDestroy(&container)); 5073 *mpimat = B_mpi; 5074 5075 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5076 PetscFunctionReturn(PETSC_SUCCESS); 5077 } 5078 5079 /*@ 5080 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5081 matrices from each processor 5082 5083 Collective 5084 5085 Input Parameters: 5086 + comm - the communicators the parallel matrix will live on 5087 . seqmat - the input sequential matrices 5088 . m - number of local rows (or `PETSC_DECIDE`) 5089 . n - number of local columns (or `PETSC_DECIDE`) 5090 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5091 5092 Output Parameter: 5093 . mpimat - the parallel matrix generated 5094 5095 Level: advanced 5096 5097 Note: 5098 The dimensions of the sequential matrix in each processor MUST be the same. 5099 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5100 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5101 5102 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5103 @*/ 5104 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5105 { 5106 PetscMPIInt size; 5107 5108 PetscFunctionBegin; 5109 PetscCallMPI(MPI_Comm_size(comm, &size)); 5110 if (size == 1) { 5111 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5112 if (scall == MAT_INITIAL_MATRIX) { 5113 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5114 } else { 5115 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5116 } 5117 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5118 PetscFunctionReturn(PETSC_SUCCESS); 5119 } 5120 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5121 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5122 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5123 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5124 PetscFunctionReturn(PETSC_SUCCESS); 5125 } 5126 5127 /*@ 5128 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5129 5130 Not Collective 5131 5132 Input Parameter: 5133 . A - the matrix 5134 5135 Output Parameter: 5136 . A_loc - the local sequential matrix generated 5137 5138 Level: developer 5139 5140 Notes: 5141 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5142 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5143 `n` is the global column count obtained with `MatGetSize()` 5144 5145 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5146 5147 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5148 5149 Destroy the matrix with `MatDestroy()` 5150 5151 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5152 @*/ 5153 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5154 { 5155 PetscBool mpi; 5156 5157 PetscFunctionBegin; 5158 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5159 if (mpi) { 5160 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5161 } else { 5162 *A_loc = A; 5163 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5164 } 5165 PetscFunctionReturn(PETSC_SUCCESS); 5166 } 5167 5168 /*@ 5169 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5170 5171 Not Collective 5172 5173 Input Parameters: 5174 + A - the matrix 5175 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5176 5177 Output Parameter: 5178 . A_loc - the local sequential matrix generated 5179 5180 Level: developer 5181 5182 Notes: 5183 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5184 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5185 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5186 5187 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5188 5189 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5190 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5191 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5192 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5193 5194 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5195 @*/ 5196 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5197 { 5198 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5199 Mat_SeqAIJ *mat, *a, *b; 5200 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5201 const PetscScalar *aa, *ba, *aav, *bav; 5202 PetscScalar *ca, *cam; 5203 PetscMPIInt size; 5204 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5205 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5206 PetscBool match; 5207 5208 PetscFunctionBegin; 5209 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5210 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5211 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5212 if (size == 1) { 5213 if (scall == MAT_INITIAL_MATRIX) { 5214 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5215 *A_loc = mpimat->A; 5216 } else if (scall == MAT_REUSE_MATRIX) { 5217 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5218 } 5219 PetscFunctionReturn(PETSC_SUCCESS); 5220 } 5221 5222 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5223 a = (Mat_SeqAIJ *)mpimat->A->data; 5224 b = (Mat_SeqAIJ *)mpimat->B->data; 5225 ai = a->i; 5226 aj = a->j; 5227 bi = b->i; 5228 bj = b->j; 5229 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5230 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5231 aa = aav; 5232 ba = bav; 5233 if (scall == MAT_INITIAL_MATRIX) { 5234 PetscCall(PetscMalloc1(1 + am, &ci)); 5235 ci[0] = 0; 5236 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5237 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5238 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5239 k = 0; 5240 for (i = 0; i < am; i++) { 5241 ncols_o = bi[i + 1] - bi[i]; 5242 ncols_d = ai[i + 1] - ai[i]; 5243 /* off-diagonal portion of A */ 5244 for (jo = 0; jo < ncols_o; jo++) { 5245 col = cmap[*bj]; 5246 if (col >= cstart) break; 5247 cj[k] = col; 5248 bj++; 5249 ca[k++] = *ba++; 5250 } 5251 /* diagonal portion of A */ 5252 for (j = 0; j < ncols_d; j++) { 5253 cj[k] = cstart + *aj++; 5254 ca[k++] = *aa++; 5255 } 5256 /* off-diagonal portion of A */ 5257 for (j = jo; j < ncols_o; j++) { 5258 cj[k] = cmap[*bj++]; 5259 ca[k++] = *ba++; 5260 } 5261 } 5262 /* put together the new matrix */ 5263 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5264 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5265 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5266 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5267 mat->free_a = PETSC_TRUE; 5268 mat->free_ij = PETSC_TRUE; 5269 mat->nonew = 0; 5270 } else if (scall == MAT_REUSE_MATRIX) { 5271 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5272 ci = mat->i; 5273 cj = mat->j; 5274 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5275 for (i = 0; i < am; i++) { 5276 /* off-diagonal portion of A */ 5277 ncols_o = bi[i + 1] - bi[i]; 5278 for (jo = 0; jo < ncols_o; jo++) { 5279 col = cmap[*bj]; 5280 if (col >= cstart) break; 5281 *cam++ = *ba++; 5282 bj++; 5283 } 5284 /* diagonal portion of A */ 5285 ncols_d = ai[i + 1] - ai[i]; 5286 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5287 /* off-diagonal portion of A */ 5288 for (j = jo; j < ncols_o; j++) { 5289 *cam++ = *ba++; 5290 bj++; 5291 } 5292 } 5293 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5294 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5295 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5296 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5297 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5298 PetscFunctionReturn(PETSC_SUCCESS); 5299 } 5300 5301 /*@ 5302 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5303 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5304 5305 Not Collective 5306 5307 Input Parameters: 5308 + A - the matrix 5309 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5310 5311 Output Parameters: 5312 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5313 - A_loc - the local sequential matrix generated 5314 5315 Level: developer 5316 5317 Note: 5318 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5319 part, then those associated with the off-diagonal part (in its local ordering) 5320 5321 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5322 @*/ 5323 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5324 { 5325 Mat Ao, Ad; 5326 const PetscInt *cmap; 5327 PetscMPIInt size; 5328 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5329 5330 PetscFunctionBegin; 5331 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5332 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5333 if (size == 1) { 5334 if (scall == MAT_INITIAL_MATRIX) { 5335 PetscCall(PetscObjectReference((PetscObject)Ad)); 5336 *A_loc = Ad; 5337 } else if (scall == MAT_REUSE_MATRIX) { 5338 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5339 } 5340 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5341 PetscFunctionReturn(PETSC_SUCCESS); 5342 } 5343 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5344 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5345 if (f) { 5346 PetscCall((*f)(A, scall, glob, A_loc)); 5347 } else { 5348 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5349 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5350 Mat_SeqAIJ *c; 5351 PetscInt *ai = a->i, *aj = a->j; 5352 PetscInt *bi = b->i, *bj = b->j; 5353 PetscInt *ci, *cj; 5354 const PetscScalar *aa, *ba; 5355 PetscScalar *ca; 5356 PetscInt i, j, am, dn, on; 5357 5358 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5359 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5360 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5361 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5362 if (scall == MAT_INITIAL_MATRIX) { 5363 PetscInt k; 5364 PetscCall(PetscMalloc1(1 + am, &ci)); 5365 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5366 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5367 ci[0] = 0; 5368 for (i = 0, k = 0; i < am; i++) { 5369 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5370 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5371 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5372 /* diagonal portion of A */ 5373 for (j = 0; j < ncols_d; j++, k++) { 5374 cj[k] = *aj++; 5375 ca[k] = *aa++; 5376 } 5377 /* off-diagonal portion of A */ 5378 for (j = 0; j < ncols_o; j++, k++) { 5379 cj[k] = dn + *bj++; 5380 ca[k] = *ba++; 5381 } 5382 } 5383 /* put together the new matrix */ 5384 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5385 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5386 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5387 c = (Mat_SeqAIJ *)(*A_loc)->data; 5388 c->free_a = PETSC_TRUE; 5389 c->free_ij = PETSC_TRUE; 5390 c->nonew = 0; 5391 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5392 } else if (scall == MAT_REUSE_MATRIX) { 5393 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5394 for (i = 0; i < am; i++) { 5395 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5396 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5397 /* diagonal portion of A */ 5398 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5399 /* off-diagonal portion of A */ 5400 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5401 } 5402 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5403 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5404 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5405 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5406 if (glob) { 5407 PetscInt cst, *gidx; 5408 5409 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5410 PetscCall(PetscMalloc1(dn + on, &gidx)); 5411 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5412 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5413 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5414 } 5415 } 5416 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5417 PetscFunctionReturn(PETSC_SUCCESS); 5418 } 5419 5420 /*@C 5421 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5422 5423 Not Collective 5424 5425 Input Parameters: 5426 + A - the matrix 5427 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5428 . row - index set of rows to extract (or `NULL`) 5429 - col - index set of columns to extract (or `NULL`) 5430 5431 Output Parameter: 5432 . A_loc - the local sequential matrix generated 5433 5434 Level: developer 5435 5436 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5437 @*/ 5438 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5439 { 5440 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5441 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5442 IS isrowa, iscola; 5443 Mat *aloc; 5444 PetscBool match; 5445 5446 PetscFunctionBegin; 5447 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5448 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5449 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5450 if (!row) { 5451 start = A->rmap->rstart; 5452 end = A->rmap->rend; 5453 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5454 } else { 5455 isrowa = *row; 5456 } 5457 if (!col) { 5458 start = A->cmap->rstart; 5459 cmap = a->garray; 5460 nzA = a->A->cmap->n; 5461 nzB = a->B->cmap->n; 5462 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5463 ncols = 0; 5464 for (i = 0; i < nzB; i++) { 5465 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5466 else break; 5467 } 5468 imark = i; 5469 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5470 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5471 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5472 } else { 5473 iscola = *col; 5474 } 5475 if (scall != MAT_INITIAL_MATRIX) { 5476 PetscCall(PetscMalloc1(1, &aloc)); 5477 aloc[0] = *A_loc; 5478 } 5479 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5480 if (!col) { /* attach global id of condensed columns */ 5481 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5482 } 5483 *A_loc = aloc[0]; 5484 PetscCall(PetscFree(aloc)); 5485 if (!row) PetscCall(ISDestroy(&isrowa)); 5486 if (!col) PetscCall(ISDestroy(&iscola)); 5487 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5488 PetscFunctionReturn(PETSC_SUCCESS); 5489 } 5490 5491 /* 5492 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5493 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5494 * on a global size. 5495 * */ 5496 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5497 { 5498 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5499 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5500 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5501 PetscMPIInt owner; 5502 PetscSFNode *iremote, *oiremote; 5503 const PetscInt *lrowindices; 5504 PetscSF sf, osf; 5505 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5506 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5507 MPI_Comm comm; 5508 ISLocalToGlobalMapping mapping; 5509 const PetscScalar *pd_a, *po_a; 5510 5511 PetscFunctionBegin; 5512 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5513 /* plocalsize is the number of roots 5514 * nrows is the number of leaves 5515 * */ 5516 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5517 PetscCall(ISGetLocalSize(rows, &nrows)); 5518 PetscCall(PetscCalloc1(nrows, &iremote)); 5519 PetscCall(ISGetIndices(rows, &lrowindices)); 5520 for (i = 0; i < nrows; i++) { 5521 /* Find a remote index and an owner for a row 5522 * The row could be local or remote 5523 * */ 5524 owner = 0; 5525 lidx = 0; 5526 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5527 iremote[i].index = lidx; 5528 iremote[i].rank = owner; 5529 } 5530 /* Create SF to communicate how many nonzero columns for each row */ 5531 PetscCall(PetscSFCreate(comm, &sf)); 5532 /* SF will figure out the number of nonzero columns for each row, and their 5533 * offsets 5534 * */ 5535 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5536 PetscCall(PetscSFSetFromOptions(sf)); 5537 PetscCall(PetscSFSetUp(sf)); 5538 5539 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5540 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5541 PetscCall(PetscCalloc1(nrows, &pnnz)); 5542 roffsets[0] = 0; 5543 roffsets[1] = 0; 5544 for (i = 0; i < plocalsize; i++) { 5545 /* diagonal */ 5546 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5547 /* off-diagonal */ 5548 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5549 /* compute offsets so that we relative location for each row */ 5550 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5551 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5552 } 5553 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5554 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5555 /* 'r' means root, and 'l' means leaf */ 5556 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5557 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5558 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5559 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5560 PetscCall(PetscSFDestroy(&sf)); 5561 PetscCall(PetscFree(roffsets)); 5562 PetscCall(PetscFree(nrcols)); 5563 dntotalcols = 0; 5564 ontotalcols = 0; 5565 ncol = 0; 5566 for (i = 0; i < nrows; i++) { 5567 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5568 ncol = PetscMax(pnnz[i], ncol); 5569 /* diagonal */ 5570 dntotalcols += nlcols[i * 2 + 0]; 5571 /* off-diagonal */ 5572 ontotalcols += nlcols[i * 2 + 1]; 5573 } 5574 /* We do not need to figure the right number of columns 5575 * since all the calculations will be done by going through the raw data 5576 * */ 5577 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5578 PetscCall(MatSetUp(*P_oth)); 5579 PetscCall(PetscFree(pnnz)); 5580 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5581 /* diagonal */ 5582 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5583 /* off-diagonal */ 5584 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5585 /* diagonal */ 5586 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5587 /* off-diagonal */ 5588 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5589 dntotalcols = 0; 5590 ontotalcols = 0; 5591 ntotalcols = 0; 5592 for (i = 0; i < nrows; i++) { 5593 owner = 0; 5594 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5595 /* Set iremote for diag matrix */ 5596 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5597 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5598 iremote[dntotalcols].rank = owner; 5599 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5600 ilocal[dntotalcols++] = ntotalcols++; 5601 } 5602 /* off-diagonal */ 5603 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5604 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5605 oiremote[ontotalcols].rank = owner; 5606 oilocal[ontotalcols++] = ntotalcols++; 5607 } 5608 } 5609 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5610 PetscCall(PetscFree(loffsets)); 5611 PetscCall(PetscFree(nlcols)); 5612 PetscCall(PetscSFCreate(comm, &sf)); 5613 /* P serves as roots and P_oth is leaves 5614 * Diag matrix 5615 * */ 5616 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5617 PetscCall(PetscSFSetFromOptions(sf)); 5618 PetscCall(PetscSFSetUp(sf)); 5619 5620 PetscCall(PetscSFCreate(comm, &osf)); 5621 /* off-diagonal */ 5622 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5623 PetscCall(PetscSFSetFromOptions(osf)); 5624 PetscCall(PetscSFSetUp(osf)); 5625 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5626 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5627 /* operate on the matrix internal data to save memory */ 5628 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5629 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5630 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5631 /* Convert to global indices for diag matrix */ 5632 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5633 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5634 /* We want P_oth store global indices */ 5635 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5636 /* Use memory scalable approach */ 5637 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5638 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5639 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5640 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5641 /* Convert back to local indices */ 5642 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5643 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5644 nout = 0; 5645 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5646 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5647 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5648 /* Exchange values */ 5649 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5650 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5651 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5652 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5653 /* Stop PETSc from shrinking memory */ 5654 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5655 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5656 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5657 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5658 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5659 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5660 PetscCall(PetscSFDestroy(&sf)); 5661 PetscCall(PetscSFDestroy(&osf)); 5662 PetscFunctionReturn(PETSC_SUCCESS); 5663 } 5664 5665 /* 5666 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5667 * This supports MPIAIJ and MAIJ 5668 * */ 5669 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5670 { 5671 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5672 Mat_SeqAIJ *p_oth; 5673 IS rows, map; 5674 PetscHMapI hamp; 5675 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5676 MPI_Comm comm; 5677 PetscSF sf, osf; 5678 PetscBool has; 5679 5680 PetscFunctionBegin; 5681 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5682 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5683 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5684 * and then create a submatrix (that often is an overlapping matrix) 5685 * */ 5686 if (reuse == MAT_INITIAL_MATRIX) { 5687 /* Use a hash table to figure out unique keys */ 5688 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5689 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5690 count = 0; 5691 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5692 for (i = 0; i < a->B->cmap->n; i++) { 5693 key = a->garray[i] / dof; 5694 PetscCall(PetscHMapIHas(hamp, key, &has)); 5695 if (!has) { 5696 mapping[i] = count; 5697 PetscCall(PetscHMapISet(hamp, key, count++)); 5698 } else { 5699 /* Current 'i' has the same value the previous step */ 5700 mapping[i] = count - 1; 5701 } 5702 } 5703 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5704 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5705 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5706 PetscCall(PetscCalloc1(htsize, &rowindices)); 5707 off = 0; 5708 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5709 PetscCall(PetscHMapIDestroy(&hamp)); 5710 PetscCall(PetscSortInt(htsize, rowindices)); 5711 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5712 /* In case, the matrix was already created but users want to recreate the matrix */ 5713 PetscCall(MatDestroy(P_oth)); 5714 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5715 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5716 PetscCall(ISDestroy(&map)); 5717 PetscCall(ISDestroy(&rows)); 5718 } else if (reuse == MAT_REUSE_MATRIX) { 5719 /* If matrix was already created, we simply update values using SF objects 5720 * that as attached to the matrix earlier. 5721 */ 5722 const PetscScalar *pd_a, *po_a; 5723 5724 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5725 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5726 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5727 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5728 /* Update values in place */ 5729 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5730 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5731 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5732 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5733 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5734 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5735 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5736 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5737 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5738 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5739 PetscFunctionReturn(PETSC_SUCCESS); 5740 } 5741 5742 /*@C 5743 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5744 5745 Collective 5746 5747 Input Parameters: 5748 + A - the first matrix in `MATMPIAIJ` format 5749 . B - the second matrix in `MATMPIAIJ` format 5750 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5751 5752 Output Parameters: 5753 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5754 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5755 - B_seq - the sequential matrix generated 5756 5757 Level: developer 5758 5759 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5760 @*/ 5761 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5762 { 5763 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5764 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5765 IS isrowb, iscolb; 5766 Mat *bseq = NULL; 5767 5768 PetscFunctionBegin; 5769 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5770 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5771 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5772 5773 if (scall == MAT_INITIAL_MATRIX) { 5774 start = A->cmap->rstart; 5775 cmap = a->garray; 5776 nzA = a->A->cmap->n; 5777 nzB = a->B->cmap->n; 5778 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5779 ncols = 0; 5780 for (i = 0; i < nzB; i++) { /* row < local row index */ 5781 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5782 else break; 5783 } 5784 imark = i; 5785 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5786 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5787 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5788 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5789 } else { 5790 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5791 isrowb = *rowb; 5792 iscolb = *colb; 5793 PetscCall(PetscMalloc1(1, &bseq)); 5794 bseq[0] = *B_seq; 5795 } 5796 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5797 *B_seq = bseq[0]; 5798 PetscCall(PetscFree(bseq)); 5799 if (!rowb) { 5800 PetscCall(ISDestroy(&isrowb)); 5801 } else { 5802 *rowb = isrowb; 5803 } 5804 if (!colb) { 5805 PetscCall(ISDestroy(&iscolb)); 5806 } else { 5807 *colb = iscolb; 5808 } 5809 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5810 PetscFunctionReturn(PETSC_SUCCESS); 5811 } 5812 5813 /* 5814 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5815 of the OFF-DIAGONAL portion of local A 5816 5817 Collective 5818 5819 Input Parameters: 5820 + A,B - the matrices in `MATMPIAIJ` format 5821 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5822 5823 Output Parameter: 5824 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5825 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5826 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5827 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5828 5829 Developer Note: 5830 This directly accesses information inside the VecScatter associated with the matrix-vector product 5831 for this matrix. This is not desirable.. 5832 5833 Level: developer 5834 5835 */ 5836 5837 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5838 { 5839 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5840 VecScatter ctx; 5841 MPI_Comm comm; 5842 const PetscMPIInt *rprocs, *sprocs; 5843 PetscMPIInt nrecvs, nsends; 5844 const PetscInt *srow, *rstarts, *sstarts; 5845 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5846 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5847 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5848 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5849 PetscMPIInt size, tag, rank, nreqs; 5850 5851 PetscFunctionBegin; 5852 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5853 PetscCallMPI(MPI_Comm_size(comm, &size)); 5854 5855 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5856 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5857 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5858 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5859 5860 if (size == 1) { 5861 startsj_s = NULL; 5862 bufa_ptr = NULL; 5863 *B_oth = NULL; 5864 PetscFunctionReturn(PETSC_SUCCESS); 5865 } 5866 5867 ctx = a->Mvctx; 5868 tag = ((PetscObject)ctx)->tag; 5869 5870 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5871 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5872 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5873 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5874 PetscCall(PetscMalloc1(nreqs, &reqs)); 5875 rwaits = reqs; 5876 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5877 5878 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5879 if (scall == MAT_INITIAL_MATRIX) { 5880 /* i-array */ 5881 /* post receives */ 5882 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5883 for (i = 0; i < nrecvs; i++) { 5884 rowlen = rvalues + rstarts[i] * rbs; 5885 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5886 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5887 } 5888 5889 /* pack the outgoing message */ 5890 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5891 5892 sstartsj[0] = 0; 5893 rstartsj[0] = 0; 5894 len = 0; /* total length of j or a array to be sent */ 5895 if (nsends) { 5896 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5897 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5898 } 5899 for (i = 0; i < nsends; i++) { 5900 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5901 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5902 for (j = 0; j < nrows; j++) { 5903 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5904 for (l = 0; l < sbs; l++) { 5905 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5906 5907 rowlen[j * sbs + l] = ncols; 5908 5909 len += ncols; 5910 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5911 } 5912 k++; 5913 } 5914 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5915 5916 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5917 } 5918 /* recvs and sends of i-array are completed */ 5919 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5920 PetscCall(PetscFree(svalues)); 5921 5922 /* allocate buffers for sending j and a arrays */ 5923 PetscCall(PetscMalloc1(len + 1, &bufj)); 5924 PetscCall(PetscMalloc1(len + 1, &bufa)); 5925 5926 /* create i-array of B_oth */ 5927 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5928 5929 b_othi[0] = 0; 5930 len = 0; /* total length of j or a array to be received */ 5931 k = 0; 5932 for (i = 0; i < nrecvs; i++) { 5933 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5934 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5935 for (j = 0; j < nrows; j++) { 5936 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5937 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5938 k++; 5939 } 5940 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5941 } 5942 PetscCall(PetscFree(rvalues)); 5943 5944 /* allocate space for j and a arrays of B_oth */ 5945 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5946 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5947 5948 /* j-array */ 5949 /* post receives of j-array */ 5950 for (i = 0; i < nrecvs; i++) { 5951 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5952 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5953 } 5954 5955 /* pack the outgoing message j-array */ 5956 if (nsends) k = sstarts[0]; 5957 for (i = 0; i < nsends; i++) { 5958 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5959 bufJ = bufj + sstartsj[i]; 5960 for (j = 0; j < nrows; j++) { 5961 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5962 for (ll = 0; ll < sbs; ll++) { 5963 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5964 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5965 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5966 } 5967 } 5968 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5969 } 5970 5971 /* recvs and sends of j-array are completed */ 5972 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5973 } else if (scall == MAT_REUSE_MATRIX) { 5974 sstartsj = *startsj_s; 5975 rstartsj = *startsj_r; 5976 bufa = *bufa_ptr; 5977 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5978 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5979 5980 /* a-array */ 5981 /* post receives of a-array */ 5982 for (i = 0; i < nrecvs; i++) { 5983 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5984 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5985 } 5986 5987 /* pack the outgoing message a-array */ 5988 if (nsends) k = sstarts[0]; 5989 for (i = 0; i < nsends; i++) { 5990 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5991 bufA = bufa + sstartsj[i]; 5992 for (j = 0; j < nrows; j++) { 5993 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5994 for (ll = 0; ll < sbs; ll++) { 5995 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5996 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5997 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5998 } 5999 } 6000 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6001 } 6002 /* recvs and sends of a-array are completed */ 6003 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6004 PetscCall(PetscFree(reqs)); 6005 6006 if (scall == MAT_INITIAL_MATRIX) { 6007 Mat_SeqAIJ *b_oth; 6008 6009 /* put together the new matrix */ 6010 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6011 6012 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6013 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6014 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6015 b_oth->free_a = PETSC_TRUE; 6016 b_oth->free_ij = PETSC_TRUE; 6017 b_oth->nonew = 0; 6018 6019 PetscCall(PetscFree(bufj)); 6020 if (!startsj_s || !bufa_ptr) { 6021 PetscCall(PetscFree2(sstartsj, rstartsj)); 6022 PetscCall(PetscFree(bufa_ptr)); 6023 } else { 6024 *startsj_s = sstartsj; 6025 *startsj_r = rstartsj; 6026 *bufa_ptr = bufa; 6027 } 6028 } else if (scall == MAT_REUSE_MATRIX) { 6029 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6030 } 6031 6032 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6033 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6034 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6035 PetscFunctionReturn(PETSC_SUCCESS); 6036 } 6037 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6041 #if defined(PETSC_HAVE_MKL_SPARSE) 6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6043 #endif 6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6046 #if defined(PETSC_HAVE_ELEMENTAL) 6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 #if defined(PETSC_HAVE_SCALAPACK) 6050 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6051 #endif 6052 #if defined(PETSC_HAVE_HYPRE) 6053 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6054 #endif 6055 #if defined(PETSC_HAVE_CUDA) 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6057 #endif 6058 #if defined(PETSC_HAVE_HIP) 6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6060 #endif 6061 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6062 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6063 #endif 6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6065 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6066 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6067 6068 /* 6069 Computes (B'*A')' since computing B*A directly is untenable 6070 6071 n p p 6072 [ ] [ ] [ ] 6073 m [ A ] * n [ B ] = m [ C ] 6074 [ ] [ ] [ ] 6075 6076 */ 6077 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6078 { 6079 Mat At, Bt, Ct; 6080 6081 PetscFunctionBegin; 6082 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6083 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6084 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6085 PetscCall(MatDestroy(&At)); 6086 PetscCall(MatDestroy(&Bt)); 6087 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6088 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6089 PetscCall(MatDestroy(&Ct)); 6090 PetscFunctionReturn(PETSC_SUCCESS); 6091 } 6092 6093 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6094 { 6095 PetscBool cisdense; 6096 6097 PetscFunctionBegin; 6098 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6099 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6100 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6101 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6102 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6103 PetscCall(MatSetUp(C)); 6104 6105 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6106 PetscFunctionReturn(PETSC_SUCCESS); 6107 } 6108 6109 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6110 { 6111 Mat_Product *product = C->product; 6112 Mat A = product->A, B = product->B; 6113 6114 PetscFunctionBegin; 6115 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6116 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6117 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6118 C->ops->productsymbolic = MatProductSymbolic_AB; 6119 PetscFunctionReturn(PETSC_SUCCESS); 6120 } 6121 6122 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6123 { 6124 Mat_Product *product = C->product; 6125 6126 PetscFunctionBegin; 6127 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6128 PetscFunctionReturn(PETSC_SUCCESS); 6129 } 6130 6131 /* 6132 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6133 6134 Input Parameters: 6135 6136 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6137 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6138 6139 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6140 6141 For Set1, j1[] contains column indices of the nonzeros. 6142 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6143 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6144 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6145 6146 Similar for Set2. 6147 6148 This routine merges the two sets of nonzeros row by row and removes repeats. 6149 6150 Output Parameters: (memory is allocated by the caller) 6151 6152 i[],j[]: the CSR of the merged matrix, which has m rows. 6153 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6154 imap2[]: similar to imap1[], but for Set2. 6155 Note we order nonzeros row-by-row and from left to right. 6156 */ 6157 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6158 { 6159 PetscInt r, m; /* Row index of mat */ 6160 PetscCount t, t1, t2, b1, e1, b2, e2; 6161 6162 PetscFunctionBegin; 6163 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6164 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6165 i[0] = 0; 6166 for (r = 0; r < m; r++) { /* Do row by row merging */ 6167 b1 = rowBegin1[r]; 6168 e1 = rowEnd1[r]; 6169 b2 = rowBegin2[r]; 6170 e2 = rowEnd2[r]; 6171 while (b1 < e1 && b2 < e2) { 6172 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6173 j[t] = j1[b1]; 6174 imap1[t1] = t; 6175 imap2[t2] = t; 6176 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6177 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6178 t1++; 6179 t2++; 6180 t++; 6181 } else if (j1[b1] < j2[b2]) { 6182 j[t] = j1[b1]; 6183 imap1[t1] = t; 6184 b1 += jmap1[t1 + 1] - jmap1[t1]; 6185 t1++; 6186 t++; 6187 } else { 6188 j[t] = j2[b2]; 6189 imap2[t2] = t; 6190 b2 += jmap2[t2 + 1] - jmap2[t2]; 6191 t2++; 6192 t++; 6193 } 6194 } 6195 /* Merge the remaining in either j1[] or j2[] */ 6196 while (b1 < e1) { 6197 j[t] = j1[b1]; 6198 imap1[t1] = t; 6199 b1 += jmap1[t1 + 1] - jmap1[t1]; 6200 t1++; 6201 t++; 6202 } 6203 while (b2 < e2) { 6204 j[t] = j2[b2]; 6205 imap2[t2] = t; 6206 b2 += jmap2[t2 + 1] - jmap2[t2]; 6207 t2++; 6208 t++; 6209 } 6210 PetscCall(PetscIntCast(t, i + r + 1)); 6211 } 6212 PetscFunctionReturn(PETSC_SUCCESS); 6213 } 6214 6215 /* 6216 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6217 6218 Input Parameters: 6219 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6220 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6221 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6222 6223 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6224 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6225 6226 Output Parameters: 6227 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6228 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6229 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6230 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6231 6232 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6233 Atot: number of entries belonging to the diagonal block. 6234 Annz: number of unique nonzeros belonging to the diagonal block. 6235 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6236 repeats (i.e., same 'i,j' pair). 6237 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6238 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6239 6240 Atot: number of entries belonging to the diagonal block 6241 Annz: number of unique nonzeros belonging to the diagonal block. 6242 6243 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6244 6245 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6246 */ 6247 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6248 { 6249 PetscInt cstart, cend, rstart, rend, row, col; 6250 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6251 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6252 PetscCount k, m, p, q, r, s, mid; 6253 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6254 6255 PetscFunctionBegin; 6256 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6257 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6258 m = rend - rstart; 6259 6260 /* Skip negative rows */ 6261 for (k = 0; k < n; k++) 6262 if (i[k] >= 0) break; 6263 6264 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6265 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6266 */ 6267 while (k < n) { 6268 row = i[k]; 6269 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6270 for (s = k; s < n; s++) 6271 if (i[s] != row) break; 6272 6273 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6274 for (p = k; p < s; p++) { 6275 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6276 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6277 } 6278 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6279 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6280 rowBegin[row - rstart] = k; 6281 rowMid[row - rstart] = mid; 6282 rowEnd[row - rstart] = s; 6283 6284 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6285 Atot += mid - k; 6286 Btot += s - mid; 6287 6288 /* Count unique nonzeros of this diag row */ 6289 for (p = k; p < mid;) { 6290 col = j[p]; 6291 do { 6292 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6293 p++; 6294 } while (p < mid && j[p] == col); 6295 Annz++; 6296 } 6297 6298 /* Count unique nonzeros of this offdiag row */ 6299 for (p = mid; p < s;) { 6300 col = j[p]; 6301 do { 6302 p++; 6303 } while (p < s && j[p] == col); 6304 Bnnz++; 6305 } 6306 k = s; 6307 } 6308 6309 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6310 PetscCall(PetscMalloc1(Atot, &Aperm)); 6311 PetscCall(PetscMalloc1(Btot, &Bperm)); 6312 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6313 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6314 6315 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6316 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6317 for (r = 0; r < m; r++) { 6318 k = rowBegin[r]; 6319 mid = rowMid[r]; 6320 s = rowEnd[r]; 6321 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6322 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6323 Atot += mid - k; 6324 Btot += s - mid; 6325 6326 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6327 for (p = k; p < mid;) { 6328 col = j[p]; 6329 q = p; 6330 do { 6331 p++; 6332 } while (p < mid && j[p] == col); 6333 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6334 Annz++; 6335 } 6336 6337 for (p = mid; p < s;) { 6338 col = j[p]; 6339 q = p; 6340 do { 6341 p++; 6342 } while (p < s && j[p] == col); 6343 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6344 Bnnz++; 6345 } 6346 } 6347 /* Output */ 6348 *Aperm_ = Aperm; 6349 *Annz_ = Annz; 6350 *Atot_ = Atot; 6351 *Ajmap_ = Ajmap; 6352 *Bperm_ = Bperm; 6353 *Bnnz_ = Bnnz; 6354 *Btot_ = Btot; 6355 *Bjmap_ = Bjmap; 6356 PetscFunctionReturn(PETSC_SUCCESS); 6357 } 6358 6359 /* 6360 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6361 6362 Input Parameters: 6363 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6364 nnz: number of unique nonzeros in the merged matrix 6365 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6366 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6367 6368 Output Parameter: (memory is allocated by the caller) 6369 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6370 6371 Example: 6372 nnz1 = 4 6373 nnz = 6 6374 imap = [1,3,4,5] 6375 jmap = [0,3,5,6,7] 6376 then, 6377 jmap_new = [0,0,3,3,5,6,7] 6378 */ 6379 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6380 { 6381 PetscCount k, p; 6382 6383 PetscFunctionBegin; 6384 jmap_new[0] = 0; 6385 p = nnz; /* p loops over jmap_new[] backwards */ 6386 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6387 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6388 } 6389 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6390 PetscFunctionReturn(PETSC_SUCCESS); 6391 } 6392 6393 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6394 { 6395 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6396 6397 PetscFunctionBegin; 6398 PetscCall(PetscSFDestroy(&coo->sf)); 6399 PetscCall(PetscFree(coo->Aperm1)); 6400 PetscCall(PetscFree(coo->Bperm1)); 6401 PetscCall(PetscFree(coo->Ajmap1)); 6402 PetscCall(PetscFree(coo->Bjmap1)); 6403 PetscCall(PetscFree(coo->Aimap2)); 6404 PetscCall(PetscFree(coo->Bimap2)); 6405 PetscCall(PetscFree(coo->Aperm2)); 6406 PetscCall(PetscFree(coo->Bperm2)); 6407 PetscCall(PetscFree(coo->Ajmap2)); 6408 PetscCall(PetscFree(coo->Bjmap2)); 6409 PetscCall(PetscFree(coo->Cperm1)); 6410 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6411 PetscCall(PetscFree(coo)); 6412 PetscFunctionReturn(PETSC_SUCCESS); 6413 } 6414 6415 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6416 { 6417 MPI_Comm comm; 6418 PetscMPIInt rank, size; 6419 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6420 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6421 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6422 PetscContainer container; 6423 MatCOOStruct_MPIAIJ *coo; 6424 6425 PetscFunctionBegin; 6426 PetscCall(PetscFree(mpiaij->garray)); 6427 PetscCall(VecDestroy(&mpiaij->lvec)); 6428 #if defined(PETSC_USE_CTABLE) 6429 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6430 #else 6431 PetscCall(PetscFree(mpiaij->colmap)); 6432 #endif 6433 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6434 mat->assembled = PETSC_FALSE; 6435 mat->was_assembled = PETSC_FALSE; 6436 6437 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6438 PetscCallMPI(MPI_Comm_size(comm, &size)); 6439 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6440 PetscCall(PetscLayoutSetUp(mat->rmap)); 6441 PetscCall(PetscLayoutSetUp(mat->cmap)); 6442 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6443 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6444 PetscCall(MatGetLocalSize(mat, &m, &n)); 6445 PetscCall(MatGetSize(mat, &M, &N)); 6446 6447 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6448 /* entries come first, then local rows, then remote rows. */ 6449 PetscCount n1 = coo_n, *perm1; 6450 PetscInt *i1 = coo_i, *j1 = coo_j; 6451 6452 PetscCall(PetscMalloc1(n1, &perm1)); 6453 for (k = 0; k < n1; k++) perm1[k] = k; 6454 6455 /* Manipulate indices so that entries with negative row or col indices will have smallest 6456 row indices, local entries will have greater but negative row indices, and remote entries 6457 will have positive row indices. 6458 */ 6459 for (k = 0; k < n1; k++) { 6460 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6461 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6462 else { 6463 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6464 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6465 } 6466 } 6467 6468 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6469 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6470 6471 /* Advance k to the first entry we need to take care of */ 6472 for (k = 0; k < n1; k++) 6473 if (i1[k] > PETSC_INT_MIN) break; 6474 PetscCount i1start = k; 6475 6476 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6477 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6478 6479 /* Send remote rows to their owner */ 6480 /* Find which rows should be sent to which remote ranks*/ 6481 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6482 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6483 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6484 const PetscInt *ranges; 6485 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6486 6487 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6488 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6489 for (k = rem; k < n1;) { 6490 PetscMPIInt owner; 6491 PetscInt firstRow, lastRow; 6492 6493 /* Locate a row range */ 6494 firstRow = i1[k]; /* first row of this owner */ 6495 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6496 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6497 6498 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6499 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6500 6501 /* All entries in [k,p) belong to this remote owner */ 6502 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6503 PetscMPIInt *sendto2; 6504 PetscInt *nentries2; 6505 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6506 6507 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6508 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6509 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6510 PetscCall(PetscFree2(sendto, nentries2)); 6511 sendto = sendto2; 6512 nentries = nentries2; 6513 maxNsend = maxNsend2; 6514 } 6515 sendto[nsend] = owner; 6516 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6517 nsend++; 6518 k = p; 6519 } 6520 6521 /* Build 1st SF to know offsets on remote to send data */ 6522 PetscSF sf1; 6523 PetscInt nroots = 1, nroots2 = 0; 6524 PetscInt nleaves = nsend, nleaves2 = 0; 6525 PetscInt *offsets; 6526 PetscSFNode *iremote; 6527 6528 PetscCall(PetscSFCreate(comm, &sf1)); 6529 PetscCall(PetscMalloc1(nsend, &iremote)); 6530 PetscCall(PetscMalloc1(nsend, &offsets)); 6531 for (k = 0; k < nsend; k++) { 6532 iremote[k].rank = sendto[k]; 6533 iremote[k].index = 0; 6534 nleaves2 += nentries[k]; 6535 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6536 } 6537 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6538 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6539 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6540 PetscCall(PetscSFDestroy(&sf1)); 6541 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6542 6543 /* Build 2nd SF to send remote COOs to their owner */ 6544 PetscSF sf2; 6545 nroots = nroots2; 6546 nleaves = nleaves2; 6547 PetscCall(PetscSFCreate(comm, &sf2)); 6548 PetscCall(PetscSFSetFromOptions(sf2)); 6549 PetscCall(PetscMalloc1(nleaves, &iremote)); 6550 p = 0; 6551 for (k = 0; k < nsend; k++) { 6552 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6553 for (q = 0; q < nentries[k]; q++, p++) { 6554 iremote[p].rank = sendto[k]; 6555 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6556 } 6557 } 6558 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6559 6560 /* Send the remote COOs to their owner */ 6561 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6562 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6563 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6564 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6565 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6566 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6567 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6568 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6569 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6570 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6571 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6572 6573 PetscCall(PetscFree(offsets)); 6574 PetscCall(PetscFree2(sendto, nentries)); 6575 6576 /* Sort received COOs by row along with the permutation array */ 6577 for (k = 0; k < n2; k++) perm2[k] = k; 6578 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6579 6580 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6581 PetscCount *Cperm1; 6582 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6583 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6584 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6585 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6586 6587 /* Support for HYPRE matrices, kind of a hack. 6588 Swap min column with diagonal so that diagonal values will go first */ 6589 PetscBool hypre; 6590 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6591 if (hypre) { 6592 PetscInt *minj; 6593 PetscBT hasdiag; 6594 6595 PetscCall(PetscBTCreate(m, &hasdiag)); 6596 PetscCall(PetscMalloc1(m, &minj)); 6597 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6598 for (k = i1start; k < rem; k++) { 6599 if (j1[k] < cstart || j1[k] >= cend) continue; 6600 const PetscInt rindex = i1[k] - rstart; 6601 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6602 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6603 } 6604 for (k = 0; k < n2; k++) { 6605 if (j2[k] < cstart || j2[k] >= cend) continue; 6606 const PetscInt rindex = i2[k] - rstart; 6607 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6608 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6609 } 6610 for (k = i1start; k < rem; k++) { 6611 const PetscInt rindex = i1[k] - rstart; 6612 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6613 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6614 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6615 } 6616 for (k = 0; k < n2; k++) { 6617 const PetscInt rindex = i2[k] - rstart; 6618 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6619 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6620 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6621 } 6622 PetscCall(PetscBTDestroy(&hasdiag)); 6623 PetscCall(PetscFree(minj)); 6624 } 6625 6626 /* Split local COOs and received COOs into diag/offdiag portions */ 6627 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6628 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6629 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6630 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6631 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6632 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6633 6634 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6635 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6636 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6637 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6638 6639 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6640 PetscInt *Ai, *Bi; 6641 PetscInt *Aj, *Bj; 6642 6643 PetscCall(PetscMalloc1(m + 1, &Ai)); 6644 PetscCall(PetscMalloc1(m + 1, &Bi)); 6645 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6646 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6647 6648 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6649 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6650 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6651 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6652 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6653 6654 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6655 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6656 6657 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6658 /* expect nonzeros in A/B most likely have local contributing entries */ 6659 PetscInt Annz = Ai[m]; 6660 PetscInt Bnnz = Bi[m]; 6661 PetscCount *Ajmap1_new, *Bjmap1_new; 6662 6663 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6664 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6665 6666 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6667 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6668 6669 PetscCall(PetscFree(Aimap1)); 6670 PetscCall(PetscFree(Ajmap1)); 6671 PetscCall(PetscFree(Bimap1)); 6672 PetscCall(PetscFree(Bjmap1)); 6673 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6674 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6675 PetscCall(PetscFree(perm1)); 6676 PetscCall(PetscFree3(i2, j2, perm2)); 6677 6678 Ajmap1 = Ajmap1_new; 6679 Bjmap1 = Bjmap1_new; 6680 6681 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6682 if (Annz < Annz1 + Annz2) { 6683 PetscInt *Aj_new; 6684 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6685 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6686 PetscCall(PetscFree(Aj)); 6687 Aj = Aj_new; 6688 } 6689 6690 if (Bnnz < Bnnz1 + Bnnz2) { 6691 PetscInt *Bj_new; 6692 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6693 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6694 PetscCall(PetscFree(Bj)); 6695 Bj = Bj_new; 6696 } 6697 6698 /* Create new submatrices for on-process and off-process coupling */ 6699 PetscScalar *Aa, *Ba; 6700 MatType rtype; 6701 Mat_SeqAIJ *a, *b; 6702 PetscObjectState state; 6703 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6704 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6705 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6706 if (cstart) { 6707 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6708 } 6709 6710 PetscCall(MatGetRootType_Private(mat, &rtype)); 6711 6712 MatSeqXAIJGetOptions_Private(mpiaij->A); 6713 PetscCall(MatDestroy(&mpiaij->A)); 6714 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6715 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6716 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6717 6718 MatSeqXAIJGetOptions_Private(mpiaij->B); 6719 PetscCall(MatDestroy(&mpiaij->B)); 6720 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6721 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6722 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6723 6724 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6725 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6726 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6727 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6728 6729 a = (Mat_SeqAIJ *)mpiaij->A->data; 6730 b = (Mat_SeqAIJ *)mpiaij->B->data; 6731 a->free_a = PETSC_TRUE; 6732 a->free_ij = PETSC_TRUE; 6733 b->free_a = PETSC_TRUE; 6734 b->free_ij = PETSC_TRUE; 6735 a->maxnz = a->nz; 6736 b->maxnz = b->nz; 6737 6738 /* conversion must happen AFTER multiply setup */ 6739 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6740 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6741 PetscCall(VecDestroy(&mpiaij->lvec)); 6742 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6743 6744 // Put the COO struct in a container and then attach that to the matrix 6745 PetscCall(PetscMalloc1(1, &coo)); 6746 coo->n = coo_n; 6747 coo->sf = sf2; 6748 coo->sendlen = nleaves; 6749 coo->recvlen = nroots; 6750 coo->Annz = Annz; 6751 coo->Bnnz = Bnnz; 6752 coo->Annz2 = Annz2; 6753 coo->Bnnz2 = Bnnz2; 6754 coo->Atot1 = Atot1; 6755 coo->Atot2 = Atot2; 6756 coo->Btot1 = Btot1; 6757 coo->Btot2 = Btot2; 6758 coo->Ajmap1 = Ajmap1; 6759 coo->Aperm1 = Aperm1; 6760 coo->Bjmap1 = Bjmap1; 6761 coo->Bperm1 = Bperm1; 6762 coo->Aimap2 = Aimap2; 6763 coo->Ajmap2 = Ajmap2; 6764 coo->Aperm2 = Aperm2; 6765 coo->Bimap2 = Bimap2; 6766 coo->Bjmap2 = Bjmap2; 6767 coo->Bperm2 = Bperm2; 6768 coo->Cperm1 = Cperm1; 6769 // Allocate in preallocation. If not used, it has zero cost on host 6770 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6771 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6772 PetscCall(PetscContainerSetPointer(container, coo)); 6773 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6774 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6775 PetscCall(PetscContainerDestroy(&container)); 6776 PetscFunctionReturn(PETSC_SUCCESS); 6777 } 6778 6779 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6780 { 6781 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6782 Mat A = mpiaij->A, B = mpiaij->B; 6783 PetscScalar *Aa, *Ba; 6784 PetscScalar *sendbuf, *recvbuf; 6785 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6786 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6787 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6788 const PetscCount *Cperm1; 6789 PetscContainer container; 6790 MatCOOStruct_MPIAIJ *coo; 6791 6792 PetscFunctionBegin; 6793 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6794 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6795 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6796 sendbuf = coo->sendbuf; 6797 recvbuf = coo->recvbuf; 6798 Ajmap1 = coo->Ajmap1; 6799 Ajmap2 = coo->Ajmap2; 6800 Aimap2 = coo->Aimap2; 6801 Bjmap1 = coo->Bjmap1; 6802 Bjmap2 = coo->Bjmap2; 6803 Bimap2 = coo->Bimap2; 6804 Aperm1 = coo->Aperm1; 6805 Aperm2 = coo->Aperm2; 6806 Bperm1 = coo->Bperm1; 6807 Bperm2 = coo->Bperm2; 6808 Cperm1 = coo->Cperm1; 6809 6810 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6811 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6812 6813 /* Pack entries to be sent to remote */ 6814 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6815 6816 /* Send remote entries to their owner and overlap the communication with local computation */ 6817 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6818 /* Add local entries to A and B */ 6819 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6820 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6821 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6822 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6823 } 6824 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6825 PetscScalar sum = 0.0; 6826 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6827 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6828 } 6829 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6830 6831 /* Add received remote entries to A and B */ 6832 for (PetscCount i = 0; i < coo->Annz2; i++) { 6833 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6834 } 6835 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6836 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6837 } 6838 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6839 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6840 PetscFunctionReturn(PETSC_SUCCESS); 6841 } 6842 6843 /*MC 6844 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6845 6846 Options Database Keys: 6847 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6848 6849 Level: beginner 6850 6851 Notes: 6852 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6853 in this case the values associated with the rows and columns one passes in are set to zero 6854 in the matrix 6855 6856 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6857 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6858 6859 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6860 M*/ 6861 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6862 { 6863 Mat_MPIAIJ *b; 6864 PetscMPIInt size; 6865 6866 PetscFunctionBegin; 6867 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6868 6869 PetscCall(PetscNew(&b)); 6870 B->data = (void *)b; 6871 B->ops[0] = MatOps_Values; 6872 B->assembled = PETSC_FALSE; 6873 B->insertmode = NOT_SET_VALUES; 6874 b->size = size; 6875 6876 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6877 6878 /* build cache for off array entries formed */ 6879 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6880 6881 b->donotstash = PETSC_FALSE; 6882 b->colmap = NULL; 6883 b->garray = NULL; 6884 b->roworiented = PETSC_TRUE; 6885 6886 /* stuff used for matrix vector multiply */ 6887 b->lvec = NULL; 6888 b->Mvctx = NULL; 6889 6890 /* stuff for MatGetRow() */ 6891 b->rowindices = NULL; 6892 b->rowvalues = NULL; 6893 b->getrowactive = PETSC_FALSE; 6894 6895 /* flexible pointer used in CUSPARSE classes */ 6896 b->spptr = NULL; 6897 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6908 #if defined(PETSC_HAVE_CUDA) 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6910 #endif 6911 #if defined(PETSC_HAVE_HIP) 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6913 #endif 6914 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6916 #endif 6917 #if defined(PETSC_HAVE_MKL_SPARSE) 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6919 #endif 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6924 #if defined(PETSC_HAVE_ELEMENTAL) 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6926 #endif 6927 #if defined(PETSC_HAVE_SCALAPACK) 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6929 #endif 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6932 #if defined(PETSC_HAVE_HYPRE) 6933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6935 #endif 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6940 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6941 PetscFunctionReturn(PETSC_SUCCESS); 6942 } 6943 6944 /*@ 6945 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6946 and "off-diagonal" part of the matrix in CSR format. 6947 6948 Collective 6949 6950 Input Parameters: 6951 + comm - MPI communicator 6952 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6953 . n - This value should be the same as the local size used in creating the 6954 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6955 calculated if `N` is given) For square matrices `n` is almost always `m`. 6956 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6957 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6958 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6959 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6960 . a - matrix values 6961 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6962 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6963 - oa - matrix values 6964 6965 Output Parameter: 6966 . mat - the matrix 6967 6968 Level: advanced 6969 6970 Notes: 6971 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6972 must free the arrays once the matrix has been destroyed and not before. 6973 6974 The `i` and `j` indices are 0 based 6975 6976 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6977 6978 This sets local rows and cannot be used to set off-processor values. 6979 6980 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6981 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6982 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6983 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6984 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6985 communication if it is known that only local entries will be set. 6986 6987 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6988 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6989 @*/ 6990 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6991 { 6992 Mat_MPIAIJ *maij; 6993 6994 PetscFunctionBegin; 6995 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6996 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6997 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6998 PetscCall(MatCreate(comm, mat)); 6999 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7000 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7001 maij = (Mat_MPIAIJ *)(*mat)->data; 7002 7003 (*mat)->preallocated = PETSC_TRUE; 7004 7005 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7006 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7007 7008 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7009 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7010 7011 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7012 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7013 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7014 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7015 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7016 PetscFunctionReturn(PETSC_SUCCESS); 7017 } 7018 7019 typedef struct { 7020 Mat *mp; /* intermediate products */ 7021 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7022 PetscInt cp; /* number of intermediate products */ 7023 7024 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7025 PetscInt *startsj_s, *startsj_r; 7026 PetscScalar *bufa; 7027 Mat P_oth; 7028 7029 /* may take advantage of merging product->B */ 7030 Mat Bloc; /* B-local by merging diag and off-diag */ 7031 7032 /* cusparse does not have support to split between symbolic and numeric phases. 7033 When api_user is true, we don't need to update the numerical values 7034 of the temporary storage */ 7035 PetscBool reusesym; 7036 7037 /* support for COO values insertion */ 7038 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7039 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7040 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7041 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7042 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7043 PetscMemType mtype; 7044 7045 /* customization */ 7046 PetscBool abmerge; 7047 PetscBool P_oth_bind; 7048 } MatMatMPIAIJBACKEND; 7049 7050 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7051 { 7052 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7053 PetscInt i; 7054 7055 PetscFunctionBegin; 7056 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7057 PetscCall(PetscFree(mmdata->bufa)); 7058 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7059 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7060 PetscCall(MatDestroy(&mmdata->P_oth)); 7061 PetscCall(MatDestroy(&mmdata->Bloc)); 7062 PetscCall(PetscSFDestroy(&mmdata->sf)); 7063 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7064 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7065 PetscCall(PetscFree(mmdata->own[0])); 7066 PetscCall(PetscFree(mmdata->own)); 7067 PetscCall(PetscFree(mmdata->off[0])); 7068 PetscCall(PetscFree(mmdata->off)); 7069 PetscCall(PetscFree(mmdata)); 7070 PetscFunctionReturn(PETSC_SUCCESS); 7071 } 7072 7073 /* Copy selected n entries with indices in idx[] of A to v[]. 7074 If idx is NULL, copy the whole data array of A to v[] 7075 */ 7076 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7077 { 7078 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7079 7080 PetscFunctionBegin; 7081 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7082 if (f) { 7083 PetscCall((*f)(A, n, idx, v)); 7084 } else { 7085 const PetscScalar *vv; 7086 7087 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7088 if (n && idx) { 7089 PetscScalar *w = v; 7090 const PetscInt *oi = idx; 7091 PetscInt j; 7092 7093 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7094 } else { 7095 PetscCall(PetscArraycpy(v, vv, n)); 7096 } 7097 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7098 } 7099 PetscFunctionReturn(PETSC_SUCCESS); 7100 } 7101 7102 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7103 { 7104 MatMatMPIAIJBACKEND *mmdata; 7105 PetscInt i, n_d, n_o; 7106 7107 PetscFunctionBegin; 7108 MatCheckProduct(C, 1); 7109 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7110 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7111 if (!mmdata->reusesym) { /* update temporary matrices */ 7112 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7113 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7114 } 7115 mmdata->reusesym = PETSC_FALSE; 7116 7117 for (i = 0; i < mmdata->cp; i++) { 7118 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7119 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7120 } 7121 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7122 PetscInt noff; 7123 7124 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7125 if (mmdata->mptmp[i]) continue; 7126 if (noff) { 7127 PetscInt nown; 7128 7129 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7130 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7131 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7132 n_o += noff; 7133 n_d += nown; 7134 } else { 7135 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7136 7137 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7138 n_d += mm->nz; 7139 } 7140 } 7141 if (mmdata->hasoffproc) { /* offprocess insertion */ 7142 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7143 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7144 } 7145 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7146 PetscFunctionReturn(PETSC_SUCCESS); 7147 } 7148 7149 /* Support for Pt * A, A * P, or Pt * A * P */ 7150 #define MAX_NUMBER_INTERMEDIATE 4 7151 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7152 { 7153 Mat_Product *product = C->product; 7154 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7155 Mat_MPIAIJ *a, *p; 7156 MatMatMPIAIJBACKEND *mmdata; 7157 ISLocalToGlobalMapping P_oth_l2g = NULL; 7158 IS glob = NULL; 7159 const char *prefix; 7160 char pprefix[256]; 7161 const PetscInt *globidx, *P_oth_idx; 7162 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7163 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7164 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7165 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7166 /* a base offset; type-2: sparse with a local to global map table */ 7167 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7168 7169 MatProductType ptype; 7170 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7171 PetscMPIInt size; 7172 7173 PetscFunctionBegin; 7174 MatCheckProduct(C, 1); 7175 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7176 ptype = product->type; 7177 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7178 ptype = MATPRODUCT_AB; 7179 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7180 } 7181 switch (ptype) { 7182 case MATPRODUCT_AB: 7183 A = product->A; 7184 P = product->B; 7185 m = A->rmap->n; 7186 n = P->cmap->n; 7187 M = A->rmap->N; 7188 N = P->cmap->N; 7189 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7190 break; 7191 case MATPRODUCT_AtB: 7192 P = product->A; 7193 A = product->B; 7194 m = P->cmap->n; 7195 n = A->cmap->n; 7196 M = P->cmap->N; 7197 N = A->cmap->N; 7198 hasoffproc = PETSC_TRUE; 7199 break; 7200 case MATPRODUCT_PtAP: 7201 A = product->A; 7202 P = product->B; 7203 m = P->cmap->n; 7204 n = P->cmap->n; 7205 M = P->cmap->N; 7206 N = P->cmap->N; 7207 hasoffproc = PETSC_TRUE; 7208 break; 7209 default: 7210 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7211 } 7212 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7213 if (size == 1) hasoffproc = PETSC_FALSE; 7214 7215 /* defaults */ 7216 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7217 mp[i] = NULL; 7218 mptmp[i] = PETSC_FALSE; 7219 rmapt[i] = -1; 7220 cmapt[i] = -1; 7221 rmapa[i] = NULL; 7222 cmapa[i] = NULL; 7223 } 7224 7225 /* customization */ 7226 PetscCall(PetscNew(&mmdata)); 7227 mmdata->reusesym = product->api_user; 7228 if (ptype == MATPRODUCT_AB) { 7229 if (product->api_user) { 7230 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7231 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7232 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7233 PetscOptionsEnd(); 7234 } else { 7235 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7236 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7237 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7238 PetscOptionsEnd(); 7239 } 7240 } else if (ptype == MATPRODUCT_PtAP) { 7241 if (product->api_user) { 7242 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7243 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7244 PetscOptionsEnd(); 7245 } else { 7246 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7247 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7248 PetscOptionsEnd(); 7249 } 7250 } 7251 a = (Mat_MPIAIJ *)A->data; 7252 p = (Mat_MPIAIJ *)P->data; 7253 PetscCall(MatSetSizes(C, m, n, M, N)); 7254 PetscCall(PetscLayoutSetUp(C->rmap)); 7255 PetscCall(PetscLayoutSetUp(C->cmap)); 7256 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7257 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7258 7259 cp = 0; 7260 switch (ptype) { 7261 case MATPRODUCT_AB: /* A * P */ 7262 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7263 7264 /* A_diag * P_local (merged or not) */ 7265 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7266 /* P is product->B */ 7267 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7268 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7269 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7270 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7271 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7272 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7273 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7274 mp[cp]->product->api_user = product->api_user; 7275 PetscCall(MatProductSetFromOptions(mp[cp])); 7276 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7277 PetscCall(ISGetIndices(glob, &globidx)); 7278 rmapt[cp] = 1; 7279 cmapt[cp] = 2; 7280 cmapa[cp] = globidx; 7281 mptmp[cp] = PETSC_FALSE; 7282 cp++; 7283 } else { /* A_diag * P_diag and A_diag * P_off */ 7284 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7285 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7286 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7287 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7288 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7289 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7290 mp[cp]->product->api_user = product->api_user; 7291 PetscCall(MatProductSetFromOptions(mp[cp])); 7292 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7293 rmapt[cp] = 1; 7294 cmapt[cp] = 1; 7295 mptmp[cp] = PETSC_FALSE; 7296 cp++; 7297 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7298 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7299 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7300 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7301 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7302 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7303 mp[cp]->product->api_user = product->api_user; 7304 PetscCall(MatProductSetFromOptions(mp[cp])); 7305 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7306 rmapt[cp] = 1; 7307 cmapt[cp] = 2; 7308 cmapa[cp] = p->garray; 7309 mptmp[cp] = PETSC_FALSE; 7310 cp++; 7311 } 7312 7313 /* A_off * P_other */ 7314 if (mmdata->P_oth) { 7315 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7316 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7317 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7318 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7319 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7320 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7321 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7322 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7323 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7324 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7325 mp[cp]->product->api_user = product->api_user; 7326 PetscCall(MatProductSetFromOptions(mp[cp])); 7327 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7328 rmapt[cp] = 1; 7329 cmapt[cp] = 2; 7330 cmapa[cp] = P_oth_idx; 7331 mptmp[cp] = PETSC_FALSE; 7332 cp++; 7333 } 7334 break; 7335 7336 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7337 /* A is product->B */ 7338 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7339 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7340 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7341 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7342 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7343 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7344 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7345 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7346 mp[cp]->product->api_user = product->api_user; 7347 PetscCall(MatProductSetFromOptions(mp[cp])); 7348 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7349 PetscCall(ISGetIndices(glob, &globidx)); 7350 rmapt[cp] = 2; 7351 rmapa[cp] = globidx; 7352 cmapt[cp] = 2; 7353 cmapa[cp] = globidx; 7354 mptmp[cp] = PETSC_FALSE; 7355 cp++; 7356 } else { 7357 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7358 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7359 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7360 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7361 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7362 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7363 mp[cp]->product->api_user = product->api_user; 7364 PetscCall(MatProductSetFromOptions(mp[cp])); 7365 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7366 PetscCall(ISGetIndices(glob, &globidx)); 7367 rmapt[cp] = 1; 7368 cmapt[cp] = 2; 7369 cmapa[cp] = globidx; 7370 mptmp[cp] = PETSC_FALSE; 7371 cp++; 7372 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7373 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7374 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7375 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7376 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7377 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7378 mp[cp]->product->api_user = product->api_user; 7379 PetscCall(MatProductSetFromOptions(mp[cp])); 7380 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7381 rmapt[cp] = 2; 7382 rmapa[cp] = p->garray; 7383 cmapt[cp] = 2; 7384 cmapa[cp] = globidx; 7385 mptmp[cp] = PETSC_FALSE; 7386 cp++; 7387 } 7388 break; 7389 case MATPRODUCT_PtAP: 7390 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7391 /* P is product->B */ 7392 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7393 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7394 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7395 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7396 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7397 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7398 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7399 mp[cp]->product->api_user = product->api_user; 7400 PetscCall(MatProductSetFromOptions(mp[cp])); 7401 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7402 PetscCall(ISGetIndices(glob, &globidx)); 7403 rmapt[cp] = 2; 7404 rmapa[cp] = globidx; 7405 cmapt[cp] = 2; 7406 cmapa[cp] = globidx; 7407 mptmp[cp] = PETSC_FALSE; 7408 cp++; 7409 if (mmdata->P_oth) { 7410 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7411 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7412 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7413 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7414 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7415 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7416 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7417 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7418 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7419 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7420 mp[cp]->product->api_user = product->api_user; 7421 PetscCall(MatProductSetFromOptions(mp[cp])); 7422 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7423 mptmp[cp] = PETSC_TRUE; 7424 cp++; 7425 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7426 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7427 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7428 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7429 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7430 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7431 mp[cp]->product->api_user = product->api_user; 7432 PetscCall(MatProductSetFromOptions(mp[cp])); 7433 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7434 rmapt[cp] = 2; 7435 rmapa[cp] = globidx; 7436 cmapt[cp] = 2; 7437 cmapa[cp] = P_oth_idx; 7438 mptmp[cp] = PETSC_FALSE; 7439 cp++; 7440 } 7441 break; 7442 default: 7443 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7444 } 7445 /* sanity check */ 7446 if (size > 1) 7447 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7448 7449 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7450 for (i = 0; i < cp; i++) { 7451 mmdata->mp[i] = mp[i]; 7452 mmdata->mptmp[i] = mptmp[i]; 7453 } 7454 mmdata->cp = cp; 7455 C->product->data = mmdata; 7456 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7457 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7458 7459 /* memory type */ 7460 mmdata->mtype = PETSC_MEMTYPE_HOST; 7461 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7462 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7463 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7464 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7465 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7466 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7467 7468 /* prepare coo coordinates for values insertion */ 7469 7470 /* count total nonzeros of those intermediate seqaij Mats 7471 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7472 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7473 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7474 */ 7475 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7476 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7477 if (mptmp[cp]) continue; 7478 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7479 const PetscInt *rmap = rmapa[cp]; 7480 const PetscInt mr = mp[cp]->rmap->n; 7481 const PetscInt rs = C->rmap->rstart; 7482 const PetscInt re = C->rmap->rend; 7483 const PetscInt *ii = mm->i; 7484 for (i = 0; i < mr; i++) { 7485 const PetscInt gr = rmap[i]; 7486 const PetscInt nz = ii[i + 1] - ii[i]; 7487 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7488 else ncoo_oown += nz; /* this row is local */ 7489 } 7490 } else ncoo_d += mm->nz; 7491 } 7492 7493 /* 7494 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7495 7496 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7497 7498 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7499 7500 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7501 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7502 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7503 7504 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7505 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7506 */ 7507 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7508 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7509 7510 /* gather (i,j) of nonzeros inserted by remote procs */ 7511 if (hasoffproc) { 7512 PetscSF msf; 7513 PetscInt ncoo2, *coo_i2, *coo_j2; 7514 7515 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7516 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7517 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7518 7519 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7520 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7521 PetscInt *idxoff = mmdata->off[cp]; 7522 PetscInt *idxown = mmdata->own[cp]; 7523 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7524 const PetscInt *rmap = rmapa[cp]; 7525 const PetscInt *cmap = cmapa[cp]; 7526 const PetscInt *ii = mm->i; 7527 PetscInt *coi = coo_i + ncoo_o; 7528 PetscInt *coj = coo_j + ncoo_o; 7529 const PetscInt mr = mp[cp]->rmap->n; 7530 const PetscInt rs = C->rmap->rstart; 7531 const PetscInt re = C->rmap->rend; 7532 const PetscInt cs = C->cmap->rstart; 7533 for (i = 0; i < mr; i++) { 7534 const PetscInt *jj = mm->j + ii[i]; 7535 const PetscInt gr = rmap[i]; 7536 const PetscInt nz = ii[i + 1] - ii[i]; 7537 if (gr < rs || gr >= re) { /* this is an offproc row */ 7538 for (j = ii[i]; j < ii[i + 1]; j++) { 7539 *coi++ = gr; 7540 *idxoff++ = j; 7541 } 7542 if (!cmapt[cp]) { /* already global */ 7543 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7544 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7545 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7546 } else { /* offdiag */ 7547 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7548 } 7549 ncoo_o += nz; 7550 } else { /* this is a local row */ 7551 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7552 } 7553 } 7554 } 7555 mmdata->off[cp + 1] = idxoff; 7556 mmdata->own[cp + 1] = idxown; 7557 } 7558 7559 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7560 PetscInt incoo_o; 7561 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7562 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7563 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7564 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7565 ncoo = ncoo_d + ncoo_oown + ncoo2; 7566 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7567 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7568 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7569 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7570 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7571 PetscCall(PetscFree2(coo_i, coo_j)); 7572 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7573 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7574 coo_i = coo_i2; 7575 coo_j = coo_j2; 7576 } else { /* no offproc values insertion */ 7577 ncoo = ncoo_d; 7578 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7579 7580 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7581 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7582 PetscCall(PetscSFSetUp(mmdata->sf)); 7583 } 7584 mmdata->hasoffproc = hasoffproc; 7585 7586 /* gather (i,j) of nonzeros inserted locally */ 7587 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7588 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7589 PetscInt *coi = coo_i + ncoo_d; 7590 PetscInt *coj = coo_j + ncoo_d; 7591 const PetscInt *jj = mm->j; 7592 const PetscInt *ii = mm->i; 7593 const PetscInt *cmap = cmapa[cp]; 7594 const PetscInt *rmap = rmapa[cp]; 7595 const PetscInt mr = mp[cp]->rmap->n; 7596 const PetscInt rs = C->rmap->rstart; 7597 const PetscInt re = C->rmap->rend; 7598 const PetscInt cs = C->cmap->rstart; 7599 7600 if (mptmp[cp]) continue; 7601 if (rmapt[cp] == 1) { /* consecutive rows */ 7602 /* fill coo_i */ 7603 for (i = 0; i < mr; i++) { 7604 const PetscInt gr = i + rs; 7605 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7606 } 7607 /* fill coo_j */ 7608 if (!cmapt[cp]) { /* type-0, already global */ 7609 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7610 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7611 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7612 } else { /* type-2, local to global for sparse columns */ 7613 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7614 } 7615 ncoo_d += mm->nz; 7616 } else if (rmapt[cp] == 2) { /* sparse rows */ 7617 for (i = 0; i < mr; i++) { 7618 const PetscInt *jj = mm->j + ii[i]; 7619 const PetscInt gr = rmap[i]; 7620 const PetscInt nz = ii[i + 1] - ii[i]; 7621 if (gr >= rs && gr < re) { /* local rows */ 7622 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7623 if (!cmapt[cp]) { /* type-0, already global */ 7624 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7625 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7626 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7627 } else { /* type-2, local to global for sparse columns */ 7628 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7629 } 7630 ncoo_d += nz; 7631 } 7632 } 7633 } 7634 } 7635 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7636 PetscCall(ISDestroy(&glob)); 7637 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7638 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7639 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7640 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7641 7642 /* preallocate with COO data */ 7643 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7644 PetscCall(PetscFree2(coo_i, coo_j)); 7645 PetscFunctionReturn(PETSC_SUCCESS); 7646 } 7647 7648 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7649 { 7650 Mat_Product *product = mat->product; 7651 #if defined(PETSC_HAVE_DEVICE) 7652 PetscBool match = PETSC_FALSE; 7653 PetscBool usecpu = PETSC_FALSE; 7654 #else 7655 PetscBool match = PETSC_TRUE; 7656 #endif 7657 7658 PetscFunctionBegin; 7659 MatCheckProduct(mat, 1); 7660 #if defined(PETSC_HAVE_DEVICE) 7661 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7662 if (match) { /* we can always fallback to the CPU if requested */ 7663 switch (product->type) { 7664 case MATPRODUCT_AB: 7665 if (product->api_user) { 7666 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7667 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7668 PetscOptionsEnd(); 7669 } else { 7670 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7671 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7672 PetscOptionsEnd(); 7673 } 7674 break; 7675 case MATPRODUCT_AtB: 7676 if (product->api_user) { 7677 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7678 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7679 PetscOptionsEnd(); 7680 } else { 7681 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7682 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7683 PetscOptionsEnd(); 7684 } 7685 break; 7686 case MATPRODUCT_PtAP: 7687 if (product->api_user) { 7688 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7689 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7690 PetscOptionsEnd(); 7691 } else { 7692 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7693 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7694 PetscOptionsEnd(); 7695 } 7696 break; 7697 default: 7698 break; 7699 } 7700 match = (PetscBool)!usecpu; 7701 } 7702 #endif 7703 if (match) { 7704 switch (product->type) { 7705 case MATPRODUCT_AB: 7706 case MATPRODUCT_AtB: 7707 case MATPRODUCT_PtAP: 7708 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7709 break; 7710 default: 7711 break; 7712 } 7713 } 7714 /* fallback to MPIAIJ ops */ 7715 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7716 PetscFunctionReturn(PETSC_SUCCESS); 7717 } 7718 7719 /* 7720 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7721 7722 n - the number of block indices in cc[] 7723 cc - the block indices (must be large enough to contain the indices) 7724 */ 7725 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7726 { 7727 PetscInt cnt = -1, nidx, j; 7728 const PetscInt *idx; 7729 7730 PetscFunctionBegin; 7731 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7732 if (nidx) { 7733 cnt = 0; 7734 cc[cnt] = idx[0] / bs; 7735 for (j = 1; j < nidx; j++) { 7736 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7737 } 7738 } 7739 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7740 *n = cnt + 1; 7741 PetscFunctionReturn(PETSC_SUCCESS); 7742 } 7743 7744 /* 7745 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7746 7747 ncollapsed - the number of block indices 7748 collapsed - the block indices (must be large enough to contain the indices) 7749 */ 7750 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7751 { 7752 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7753 7754 PetscFunctionBegin; 7755 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7756 for (i = start + 1; i < start + bs; i++) { 7757 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7758 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7759 cprevtmp = cprev; 7760 cprev = merged; 7761 merged = cprevtmp; 7762 } 7763 *ncollapsed = nprev; 7764 if (collapsed) *collapsed = cprev; 7765 PetscFunctionReturn(PETSC_SUCCESS); 7766 } 7767 7768 /* 7769 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7770 7771 Input Parameter: 7772 . Amat - matrix 7773 - symmetrize - make the result symmetric 7774 + scale - scale with diagonal 7775 7776 Output Parameter: 7777 . a_Gmat - output scalar graph >= 0 7778 7779 */ 7780 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7781 { 7782 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7783 MPI_Comm comm; 7784 Mat Gmat; 7785 PetscBool ismpiaij, isseqaij; 7786 Mat a, b, c; 7787 MatType jtype; 7788 7789 PetscFunctionBegin; 7790 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7791 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7792 PetscCall(MatGetSize(Amat, &MM, &NN)); 7793 PetscCall(MatGetBlockSize(Amat, &bs)); 7794 nloc = (Iend - Istart) / bs; 7795 7796 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7797 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7798 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7799 7800 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7801 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7802 implementation */ 7803 if (bs > 1) { 7804 PetscCall(MatGetType(Amat, &jtype)); 7805 PetscCall(MatCreate(comm, &Gmat)); 7806 PetscCall(MatSetType(Gmat, jtype)); 7807 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7808 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7809 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7810 PetscInt *d_nnz, *o_nnz; 7811 MatScalar *aa, val, *AA; 7812 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7813 7814 if (isseqaij) { 7815 a = Amat; 7816 b = NULL; 7817 } else { 7818 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7819 a = d->A; 7820 b = d->B; 7821 } 7822 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7823 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7824 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7825 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7826 const PetscInt *cols1, *cols2; 7827 7828 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7829 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7830 nnz[brow / bs] = nc2 / bs; 7831 if (nc2 % bs) ok = 0; 7832 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7833 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7834 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7835 if (nc1 != nc2) ok = 0; 7836 else { 7837 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7838 if (cols1[jj] != cols2[jj]) ok = 0; 7839 if (cols1[jj] % bs != jj % bs) ok = 0; 7840 } 7841 } 7842 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7843 } 7844 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7845 if (!ok) { 7846 PetscCall(PetscFree2(d_nnz, o_nnz)); 7847 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7848 goto old_bs; 7849 } 7850 } 7851 } 7852 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7853 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7854 PetscCall(PetscFree2(d_nnz, o_nnz)); 7855 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7856 // diag 7857 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7858 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7859 7860 ai = aseq->i; 7861 n = ai[brow + 1] - ai[brow]; 7862 aj = aseq->j + ai[brow]; 7863 for (PetscInt k = 0; k < n; k += bs) { // block columns 7864 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7865 val = 0; 7866 if (index_size == 0) { 7867 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7868 aa = aseq->a + ai[brow + ii] + k; 7869 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7870 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7871 } 7872 } 7873 } else { // use (index,index) value if provided 7874 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7875 PetscInt ii = index[iii]; 7876 aa = aseq->a + ai[brow + ii] + k; 7877 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7878 PetscInt jj = index[jjj]; 7879 val += PetscAbs(PetscRealPart(aa[jj])); 7880 } 7881 } 7882 } 7883 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7884 AA[k / bs] = val; 7885 } 7886 grow = Istart / bs + brow / bs; 7887 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7888 } 7889 // off-diag 7890 if (ismpiaij) { 7891 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7892 const PetscScalar *vals; 7893 const PetscInt *cols, *garray = aij->garray; 7894 7895 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7896 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7897 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7898 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7899 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7900 AA[k / bs] = 0; 7901 AJ[cidx] = garray[cols[k]] / bs; 7902 } 7903 nc = ncols / bs; 7904 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7905 if (index_size == 0) { 7906 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7907 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7908 for (PetscInt k = 0; k < ncols; k += bs) { 7909 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7910 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7911 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7912 } 7913 } 7914 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7915 } 7916 } else { // use (index,index) value if provided 7917 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7918 PetscInt ii = index[iii]; 7919 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7920 for (PetscInt k = 0; k < ncols; k += bs) { 7921 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7922 PetscInt jj = index[jjj]; 7923 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7924 } 7925 } 7926 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7927 } 7928 } 7929 grow = Istart / bs + brow / bs; 7930 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7931 } 7932 } 7933 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7934 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7935 PetscCall(PetscFree2(AA, AJ)); 7936 } else { 7937 const PetscScalar *vals; 7938 const PetscInt *idx; 7939 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7940 old_bs: 7941 /* 7942 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7943 */ 7944 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7945 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7946 if (isseqaij) { 7947 PetscInt max_d_nnz; 7948 7949 /* 7950 Determine exact preallocation count for (sequential) scalar matrix 7951 */ 7952 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7953 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7954 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7955 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7956 PetscCall(PetscFree3(w0, w1, w2)); 7957 } else if (ismpiaij) { 7958 Mat Daij, Oaij; 7959 const PetscInt *garray; 7960 PetscInt max_d_nnz; 7961 7962 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7963 /* 7964 Determine exact preallocation count for diagonal block portion of scalar matrix 7965 */ 7966 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7967 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7968 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7969 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7970 PetscCall(PetscFree3(w0, w1, w2)); 7971 /* 7972 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7973 */ 7974 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7975 o_nnz[jj] = 0; 7976 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7977 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7978 o_nnz[jj] += ncols; 7979 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7980 } 7981 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7982 } 7983 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7984 /* get scalar copy (norms) of matrix */ 7985 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7986 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7987 PetscCall(PetscFree2(d_nnz, o_nnz)); 7988 for (Ii = Istart; Ii < Iend; Ii++) { 7989 PetscInt dest_row = Ii / bs; 7990 7991 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7992 for (jj = 0; jj < ncols; jj++) { 7993 PetscInt dest_col = idx[jj] / bs; 7994 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7995 7996 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7997 } 7998 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7999 } 8000 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8001 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8002 } 8003 } else { 8004 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8005 else { 8006 Gmat = Amat; 8007 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8008 } 8009 if (isseqaij) { 8010 a = Gmat; 8011 b = NULL; 8012 } else { 8013 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8014 a = d->A; 8015 b = d->B; 8016 } 8017 if (filter >= 0 || scale) { 8018 /* take absolute value of each entry */ 8019 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8020 MatInfo info; 8021 PetscScalar *avals; 8022 8023 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8024 PetscCall(MatSeqAIJGetArray(c, &avals)); 8025 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8026 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8027 } 8028 } 8029 } 8030 if (symmetrize) { 8031 PetscBool isset, issym; 8032 8033 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8034 if (!isset || !issym) { 8035 Mat matTrans; 8036 8037 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8038 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8039 PetscCall(MatDestroy(&matTrans)); 8040 } 8041 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8042 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8043 if (scale) { 8044 /* scale c for all diagonal values = 1 or -1 */ 8045 Vec diag; 8046 8047 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8048 PetscCall(MatGetDiagonal(Gmat, diag)); 8049 PetscCall(VecReciprocal(diag)); 8050 PetscCall(VecSqrtAbs(diag)); 8051 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8052 PetscCall(VecDestroy(&diag)); 8053 } 8054 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8055 if (filter >= 0) { 8056 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8057 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8058 } 8059 *a_Gmat = Gmat; 8060 PetscFunctionReturn(PETSC_SUCCESS); 8061 } 8062 8063 /* 8064 Special version for direct calls from Fortran 8065 */ 8066 8067 /* Change these macros so can be used in void function */ 8068 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8069 #undef PetscCall 8070 #define PetscCall(...) \ 8071 do { \ 8072 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8073 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8074 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8075 return; \ 8076 } \ 8077 } while (0) 8078 8079 #undef SETERRQ 8080 #define SETERRQ(comm, ierr, ...) \ 8081 do { \ 8082 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8083 return; \ 8084 } while (0) 8085 8086 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8087 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8088 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8089 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8090 #else 8091 #endif 8092 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8093 { 8094 Mat mat = *mmat; 8095 PetscInt m = *mm, n = *mn; 8096 InsertMode addv = *maddv; 8097 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8098 PetscScalar value; 8099 8100 MatCheckPreallocated(mat, 1); 8101 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8102 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8103 { 8104 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8105 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8106 PetscBool roworiented = aij->roworiented; 8107 8108 /* Some Variables required in the macro */ 8109 Mat A = aij->A; 8110 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8111 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8112 MatScalar *aa; 8113 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8114 Mat B = aij->B; 8115 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8116 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8117 MatScalar *ba; 8118 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8119 * cannot use "#if defined" inside a macro. */ 8120 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8121 8122 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8123 PetscInt nonew = a->nonew; 8124 MatScalar *ap1, *ap2; 8125 8126 PetscFunctionBegin; 8127 PetscCall(MatSeqAIJGetArray(A, &aa)); 8128 PetscCall(MatSeqAIJGetArray(B, &ba)); 8129 for (i = 0; i < m; i++) { 8130 if (im[i] < 0) continue; 8131 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8132 if (im[i] >= rstart && im[i] < rend) { 8133 row = im[i] - rstart; 8134 lastcol1 = -1; 8135 rp1 = aj + ai[row]; 8136 ap1 = aa + ai[row]; 8137 rmax1 = aimax[row]; 8138 nrow1 = ailen[row]; 8139 low1 = 0; 8140 high1 = nrow1; 8141 lastcol2 = -1; 8142 rp2 = bj + bi[row]; 8143 ap2 = ba + bi[row]; 8144 rmax2 = bimax[row]; 8145 nrow2 = bilen[row]; 8146 low2 = 0; 8147 high2 = nrow2; 8148 8149 for (j = 0; j < n; j++) { 8150 if (roworiented) value = v[i * n + j]; 8151 else value = v[i + j * m]; 8152 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8153 if (in[j] >= cstart && in[j] < cend) { 8154 col = in[j] - cstart; 8155 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8156 } else if (in[j] < 0) continue; 8157 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8158 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8159 } else { 8160 if (mat->was_assembled) { 8161 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8162 #if defined(PETSC_USE_CTABLE) 8163 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8164 col--; 8165 #else 8166 col = aij->colmap[in[j]] - 1; 8167 #endif 8168 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8169 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8170 col = in[j]; 8171 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8172 B = aij->B; 8173 b = (Mat_SeqAIJ *)B->data; 8174 bimax = b->imax; 8175 bi = b->i; 8176 bilen = b->ilen; 8177 bj = b->j; 8178 rp2 = bj + bi[row]; 8179 ap2 = ba + bi[row]; 8180 rmax2 = bimax[row]; 8181 nrow2 = bilen[row]; 8182 low2 = 0; 8183 high2 = nrow2; 8184 bm = aij->B->rmap->n; 8185 ba = b->a; 8186 inserted = PETSC_FALSE; 8187 } 8188 } else col = in[j]; 8189 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8190 } 8191 } 8192 } else if (!aij->donotstash) { 8193 if (roworiented) { 8194 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8195 } else { 8196 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8197 } 8198 } 8199 } 8200 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8201 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8202 } 8203 PetscFunctionReturnVoid(); 8204 } 8205 8206 /* Undefining these here since they were redefined from their original definition above! No 8207 * other PETSc functions should be defined past this point, as it is impossible to recover the 8208 * original definitions */ 8209 #undef PetscCall 8210 #undef SETERRQ 8211