1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 PetscMPIInt in; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 PetscCall(PetscMPIIntCast(n, &in)); 313 if (type == NORM_INFINITY) { 314 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 315 } else { 316 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 317 } 318 PetscCall(PetscFree(work)); 319 if (type == NORM_2) { 320 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 321 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 322 for (i = 0; i < n; i++) reductions[i] /= m; 323 } 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 328 { 329 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 330 IS sis, gis; 331 const PetscInt *isis, *igis; 332 PetscInt n, *iis, nsis, ngis, rstart, i; 333 334 PetscFunctionBegin; 335 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 336 PetscCall(MatFindNonzeroRows(a->B, &gis)); 337 PetscCall(ISGetSize(gis, &ngis)); 338 PetscCall(ISGetSize(sis, &nsis)); 339 PetscCall(ISGetIndices(sis, &isis)); 340 PetscCall(ISGetIndices(gis, &igis)); 341 342 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 343 PetscCall(PetscArraycpy(iis, igis, ngis)); 344 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 345 n = ngis + nsis; 346 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 for (i = 0; i < n; i++) iis[i] += rstart; 349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 350 351 PetscCall(ISRestoreIndices(sis, &isis)); 352 PetscCall(ISRestoreIndices(gis, &igis)); 353 PetscCall(ISDestroy(&sis)); 354 PetscCall(ISDestroy(&gis)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /* 359 Local utility routine that creates a mapping from the global column 360 number to the local number in the off-diagonal part of the local 361 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 362 a slightly higher hash table cost; without it it is not scalable (each processor 363 has an order N integer array but is fast to access. 364 */ 365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 366 { 367 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 368 PetscInt n = aij->B->cmap->n, i; 369 370 PetscFunctionBegin; 371 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 372 #if defined(PETSC_USE_CTABLE) 373 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 374 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 375 #else 376 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 377 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 383 do { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col; \ 387 while (high1 - low1 > 5) { \ 388 t = (low1 + high1) / 2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i = low1; _i < high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) { \ 396 ap1[_i] += value; \ 397 /* Not sure LogFlops will slow dow the code or not */ \ 398 (void)PetscLogFlops(1.0); \ 399 } else ap1[_i] = value; \ 400 goto a_noinsert; \ 401 } \ 402 } \ 403 if (value == 0.0 && ignorezeroentries && row != col) { \ 404 low1 = 0; \ 405 high1 = nrow1; \ 406 goto a_noinsert; \ 407 } \ 408 if (nonew == 1) { \ 409 low1 = 0; \ 410 high1 = nrow1; \ 411 goto a_noinsert; \ 412 } \ 413 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 414 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 415 N = nrow1++ - 1; \ 416 a->nz++; \ 417 high1++; \ 418 /* shift up all the later entries in this row */ \ 419 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 420 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 584 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1196 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1197 1198 /* fill in and store row lengths */ 1199 PetscCall(PetscMalloc1(m, &rowlens)); 1200 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1201 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1202 PetscCall(PetscFree(rowlens)); 1203 1204 /* fill in and store column indices */ 1205 PetscCall(PetscMalloc1(nz, &colidxs)); 1206 for (cnt = 0, i = 0; i < m; i++) { 1207 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1208 if (garray[B->j[jb]] > cs) break; 1209 colidxs[cnt++] = garray[B->j[jb]]; 1210 } 1211 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1212 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1215 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1216 PetscCall(PetscFree(colidxs)); 1217 1218 /* fill in and store nonzero values */ 1219 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1220 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1221 PetscCall(PetscMalloc1(nz, &matvals)); 1222 for (cnt = 0, i = 0; i < m; i++) { 1223 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1224 if (garray[B->j[jb]] > cs) break; 1225 matvals[cnt++] = ba[jb]; 1226 } 1227 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1228 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1229 } 1230 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1231 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1232 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1233 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1234 PetscCall(PetscFree(matvals)); 1235 1236 /* write block size option to the viewer's .info file */ 1237 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1238 PetscFunctionReturn(PETSC_SUCCESS); 1239 } 1240 1241 #include <petscdraw.h> 1242 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1243 { 1244 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1245 PetscMPIInt rank = aij->rank, size = aij->size; 1246 PetscBool isdraw, iascii, isbinary; 1247 PetscViewer sviewer; 1248 PetscViewerFormat format; 1249 1250 PetscFunctionBegin; 1251 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1252 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1253 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1254 if (iascii) { 1255 PetscCall(PetscViewerGetFormat(viewer, &format)); 1256 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1257 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1258 PetscCall(PetscMalloc1(size, &nz)); 1259 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1260 for (i = 0; i < size; i++) { 1261 nmax = PetscMax(nmax, nz[i]); 1262 nmin = PetscMin(nmin, nz[i]); 1263 navg += nz[i]; 1264 } 1265 PetscCall(PetscFree(nz)); 1266 navg = navg / size; 1267 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1268 PetscFunctionReturn(PETSC_SUCCESS); 1269 } 1270 PetscCall(PetscViewerGetFormat(viewer, &format)); 1271 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1272 MatInfo info; 1273 PetscInt *inodes = NULL; 1274 1275 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1276 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1277 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1278 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1279 if (!inodes) { 1280 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1281 info.memory)); 1282 } else { 1283 PetscCall( 1284 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1285 } 1286 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1287 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1288 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1289 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1290 PetscCall(PetscViewerFlush(viewer)); 1291 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1292 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1293 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1296 PetscInt inodecount, inodelimit, *inodes; 1297 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1298 if (inodes) { 1299 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1300 } else { 1301 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1302 } 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } 1307 } else if (isbinary) { 1308 if (size == 1) { 1309 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1310 PetscCall(MatView(aij->A, viewer)); 1311 } else { 1312 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1313 } 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (iascii && size == 1) { 1316 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1317 PetscCall(MatView(aij->A, viewer)); 1318 PetscFunctionReturn(PETSC_SUCCESS); 1319 } else if (isdraw) { 1320 PetscDraw draw; 1321 PetscBool isnull; 1322 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1323 PetscCall(PetscDrawIsNull(draw, &isnull)); 1324 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1325 } 1326 1327 { /* assemble the entire matrix onto first processor */ 1328 Mat A = NULL, Av; 1329 IS isrow, iscol; 1330 1331 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1332 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1333 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1334 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1335 /* The commented code uses MatCreateSubMatrices instead */ 1336 /* 1337 Mat *AA, A = NULL, Av; 1338 IS isrow,iscol; 1339 1340 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1341 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1342 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1343 if (rank == 0) { 1344 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1345 A = AA[0]; 1346 Av = AA[0]; 1347 } 1348 PetscCall(MatDestroySubMatrices(1,&AA)); 1349 */ 1350 PetscCall(ISDestroy(&iscol)); 1351 PetscCall(ISDestroy(&isrow)); 1352 /* 1353 Everyone has to call to draw the matrix since the graphics waits are 1354 synchronized across all processors that share the PetscDraw object 1355 */ 1356 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1357 if (rank == 0) { 1358 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1359 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1360 } 1361 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1362 PetscCall(MatDestroy(&A)); 1363 } 1364 PetscFunctionReturn(PETSC_SUCCESS); 1365 } 1366 1367 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1368 { 1369 PetscBool iascii, isdraw, issocket, isbinary; 1370 1371 PetscFunctionBegin; 1372 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1373 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1374 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1376 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1377 PetscFunctionReturn(PETSC_SUCCESS); 1378 } 1379 1380 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1381 { 1382 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1383 Vec bb1 = NULL; 1384 PetscBool hasop; 1385 1386 PetscFunctionBegin; 1387 if (flag == SOR_APPLY_UPPER) { 1388 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1389 PetscFunctionReturn(PETSC_SUCCESS); 1390 } 1391 1392 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1393 1394 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1395 if (flag & SOR_ZERO_INITIAL_GUESS) { 1396 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1397 its--; 1398 } 1399 1400 while (its--) { 1401 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1402 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1403 1404 /* update rhs: bb1 = bb - B*x */ 1405 PetscCall(VecScale(mat->lvec, -1.0)); 1406 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1407 1408 /* local sweep */ 1409 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1410 } 1411 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1412 if (flag & SOR_ZERO_INITIAL_GUESS) { 1413 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1414 its--; 1415 } 1416 while (its--) { 1417 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1418 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1419 1420 /* update rhs: bb1 = bb - B*x */ 1421 PetscCall(VecScale(mat->lvec, -1.0)); 1422 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1423 1424 /* local sweep */ 1425 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1426 } 1427 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1428 if (flag & SOR_ZERO_INITIAL_GUESS) { 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1430 its--; 1431 } 1432 while (its--) { 1433 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1434 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1435 1436 /* update rhs: bb1 = bb - B*x */ 1437 PetscCall(VecScale(mat->lvec, -1.0)); 1438 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1439 1440 /* local sweep */ 1441 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1442 } 1443 } else if (flag & SOR_EISENSTAT) { 1444 Vec xx1; 1445 1446 PetscCall(VecDuplicate(bb, &xx1)); 1447 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1448 1449 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1450 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1451 if (!mat->diag) { 1452 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1453 PetscCall(MatGetDiagonal(matin, mat->diag)); 1454 } 1455 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1456 if (hasop) { 1457 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1458 } else { 1459 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1460 } 1461 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1462 1463 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1464 1465 /* local sweep */ 1466 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1467 PetscCall(VecAXPY(xx, 1.0, xx1)); 1468 PetscCall(VecDestroy(&xx1)); 1469 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1470 1471 PetscCall(VecDestroy(&bb1)); 1472 1473 matin->factorerrortype = mat->A->factorerrortype; 1474 PetscFunctionReturn(PETSC_SUCCESS); 1475 } 1476 1477 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1478 { 1479 Mat aA, aB, Aperm; 1480 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1481 PetscScalar *aa, *ba; 1482 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1483 PetscSF rowsf, sf; 1484 IS parcolp = NULL; 1485 PetscBool done; 1486 1487 PetscFunctionBegin; 1488 PetscCall(MatGetLocalSize(A, &m, &n)); 1489 PetscCall(ISGetIndices(rowp, &rwant)); 1490 PetscCall(ISGetIndices(colp, &cwant)); 1491 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1492 1493 /* Invert row permutation to find out where my rows should go */ 1494 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1495 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1496 PetscCall(PetscSFSetFromOptions(rowsf)); 1497 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1498 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1499 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1500 1501 /* Invert column permutation to find out where my columns should go */ 1502 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1503 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1504 PetscCall(PetscSFSetFromOptions(sf)); 1505 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1506 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1507 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1508 PetscCall(PetscSFDestroy(&sf)); 1509 1510 PetscCall(ISRestoreIndices(rowp, &rwant)); 1511 PetscCall(ISRestoreIndices(colp, &cwant)); 1512 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1513 1514 /* Find out where my gcols should go */ 1515 PetscCall(MatGetSize(aB, NULL, &ng)); 1516 PetscCall(PetscMalloc1(ng, &gcdest)); 1517 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1518 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1519 PetscCall(PetscSFSetFromOptions(sf)); 1520 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1521 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1522 PetscCall(PetscSFDestroy(&sf)); 1523 1524 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1525 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1526 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1527 for (i = 0; i < m; i++) { 1528 PetscInt row = rdest[i]; 1529 PetscMPIInt rowner; 1530 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1531 for (j = ai[i]; j < ai[i + 1]; j++) { 1532 PetscInt col = cdest[aj[j]]; 1533 PetscMPIInt cowner; 1534 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1535 if (rowner == cowner) dnnz[i]++; 1536 else onnz[i]++; 1537 } 1538 for (j = bi[i]; j < bi[i + 1]; j++) { 1539 PetscInt col = gcdest[bj[j]]; 1540 PetscMPIInt cowner; 1541 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1542 if (rowner == cowner) dnnz[i]++; 1543 else onnz[i]++; 1544 } 1545 } 1546 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1547 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1548 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1549 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1550 PetscCall(PetscSFDestroy(&rowsf)); 1551 1552 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1553 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1554 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1555 for (i = 0; i < m; i++) { 1556 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1557 PetscInt j0, rowlen; 1558 rowlen = ai[i + 1] - ai[i]; 1559 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1560 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1561 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1562 } 1563 rowlen = bi[i + 1] - bi[i]; 1564 for (j0 = j = 0; j < rowlen; j0 = j) { 1565 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1566 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1567 } 1568 } 1569 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1570 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1571 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1572 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1573 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1574 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1575 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1576 PetscCall(PetscFree3(work, rdest, cdest)); 1577 PetscCall(PetscFree(gcdest)); 1578 if (parcolp) PetscCall(ISDestroy(&colp)); 1579 *B = Aperm; 1580 PetscFunctionReturn(PETSC_SUCCESS); 1581 } 1582 1583 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1584 { 1585 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1586 1587 PetscFunctionBegin; 1588 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1589 if (ghosts) *ghosts = aij->garray; 1590 PetscFunctionReturn(PETSC_SUCCESS); 1591 } 1592 1593 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1594 { 1595 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1596 Mat A = mat->A, B = mat->B; 1597 PetscLogDouble isend[5], irecv[5]; 1598 1599 PetscFunctionBegin; 1600 info->block_size = 1.0; 1601 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1602 1603 isend[0] = info->nz_used; 1604 isend[1] = info->nz_allocated; 1605 isend[2] = info->nz_unneeded; 1606 isend[3] = info->memory; 1607 isend[4] = info->mallocs; 1608 1609 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1610 1611 isend[0] += info->nz_used; 1612 isend[1] += info->nz_allocated; 1613 isend[2] += info->nz_unneeded; 1614 isend[3] += info->memory; 1615 isend[4] += info->mallocs; 1616 if (flag == MAT_LOCAL) { 1617 info->nz_used = isend[0]; 1618 info->nz_allocated = isend[1]; 1619 info->nz_unneeded = isend[2]; 1620 info->memory = isend[3]; 1621 info->mallocs = isend[4]; 1622 } else if (flag == MAT_GLOBAL_MAX) { 1623 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1624 1625 info->nz_used = irecv[0]; 1626 info->nz_allocated = irecv[1]; 1627 info->nz_unneeded = irecv[2]; 1628 info->memory = irecv[3]; 1629 info->mallocs = irecv[4]; 1630 } else if (flag == MAT_GLOBAL_SUM) { 1631 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1632 1633 info->nz_used = irecv[0]; 1634 info->nz_allocated = irecv[1]; 1635 info->nz_unneeded = irecv[2]; 1636 info->memory = irecv[3]; 1637 info->mallocs = irecv[4]; 1638 } 1639 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1640 info->fill_ratio_needed = 0; 1641 info->factor_mallocs = 0; 1642 PetscFunctionReturn(PETSC_SUCCESS); 1643 } 1644 1645 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1646 { 1647 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1648 1649 PetscFunctionBegin; 1650 switch (op) { 1651 case MAT_NEW_NONZERO_LOCATIONS: 1652 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1653 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1654 case MAT_KEEP_NONZERO_PATTERN: 1655 case MAT_NEW_NONZERO_LOCATION_ERR: 1656 case MAT_USE_INODES: 1657 case MAT_IGNORE_ZERO_ENTRIES: 1658 case MAT_FORM_EXPLICIT_TRANSPOSE: 1659 MatCheckPreallocated(A, 1); 1660 PetscCall(MatSetOption(a->A, op, flg)); 1661 PetscCall(MatSetOption(a->B, op, flg)); 1662 break; 1663 case MAT_ROW_ORIENTED: 1664 MatCheckPreallocated(A, 1); 1665 a->roworiented = flg; 1666 1667 PetscCall(MatSetOption(a->A, op, flg)); 1668 PetscCall(MatSetOption(a->B, op, flg)); 1669 break; 1670 case MAT_FORCE_DIAGONAL_ENTRIES: 1671 case MAT_SORTED_FULL: 1672 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1673 break; 1674 case MAT_IGNORE_OFF_PROC_ENTRIES: 1675 a->donotstash = flg; 1676 break; 1677 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1678 case MAT_SPD: 1679 case MAT_SYMMETRIC: 1680 case MAT_STRUCTURALLY_SYMMETRIC: 1681 case MAT_HERMITIAN: 1682 case MAT_SYMMETRY_ETERNAL: 1683 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1684 case MAT_SPD_ETERNAL: 1685 /* if the diagonal matrix is square it inherits some of the properties above */ 1686 break; 1687 case MAT_SUBMAT_SINGLEIS: 1688 A->submat_singleis = flg; 1689 break; 1690 case MAT_STRUCTURE_ONLY: 1691 /* The option is handled directly by MatSetOption() */ 1692 break; 1693 default: 1694 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1695 } 1696 PetscFunctionReturn(PETSC_SUCCESS); 1697 } 1698 1699 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1700 { 1701 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1702 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1703 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1704 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1705 PetscInt *cmap, *idx_p; 1706 1707 PetscFunctionBegin; 1708 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1709 mat->getrowactive = PETSC_TRUE; 1710 1711 if (!mat->rowvalues && (idx || v)) { 1712 /* 1713 allocate enough space to hold information from the longest row. 1714 */ 1715 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1716 PetscInt max = 1, tmp; 1717 for (i = 0; i < matin->rmap->n; i++) { 1718 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1719 if (max < tmp) max = tmp; 1720 } 1721 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1722 } 1723 1724 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1725 lrow = row - rstart; 1726 1727 pvA = &vworkA; 1728 pcA = &cworkA; 1729 pvB = &vworkB; 1730 pcB = &cworkB; 1731 if (!v) { 1732 pvA = NULL; 1733 pvB = NULL; 1734 } 1735 if (!idx) { 1736 pcA = NULL; 1737 if (!v) pcB = NULL; 1738 } 1739 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1740 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1741 nztot = nzA + nzB; 1742 1743 cmap = mat->garray; 1744 if (v || idx) { 1745 if (nztot) { 1746 /* Sort by increasing column numbers, assuming A and B already sorted */ 1747 PetscInt imark = -1; 1748 if (v) { 1749 *v = v_p = mat->rowvalues; 1750 for (i = 0; i < nzB; i++) { 1751 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1752 else break; 1753 } 1754 imark = i; 1755 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1756 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1757 } 1758 if (idx) { 1759 *idx = idx_p = mat->rowindices; 1760 if (imark > -1) { 1761 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1762 } else { 1763 for (i = 0; i < nzB; i++) { 1764 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1765 else break; 1766 } 1767 imark = i; 1768 } 1769 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1770 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1771 } 1772 } else { 1773 if (idx) *idx = NULL; 1774 if (v) *v = NULL; 1775 } 1776 } 1777 *nz = nztot; 1778 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1779 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1780 PetscFunctionReturn(PETSC_SUCCESS); 1781 } 1782 1783 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1784 { 1785 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1786 1787 PetscFunctionBegin; 1788 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1789 aij->getrowactive = PETSC_FALSE; 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1794 { 1795 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1796 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1797 PetscInt i, j, cstart = mat->cmap->rstart; 1798 PetscReal sum = 0.0; 1799 const MatScalar *v, *amata, *bmata; 1800 PetscMPIInt iN; 1801 1802 PetscFunctionBegin; 1803 if (aij->size == 1) { 1804 PetscCall(MatNorm(aij->A, type, norm)); 1805 } else { 1806 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1807 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1808 if (type == NORM_FROBENIUS) { 1809 v = amata; 1810 for (i = 0; i < amat->nz; i++) { 1811 sum += PetscRealPart(PetscConj(*v) * (*v)); 1812 v++; 1813 } 1814 v = bmata; 1815 for (i = 0; i < bmat->nz; i++) { 1816 sum += PetscRealPart(PetscConj(*v) * (*v)); 1817 v++; 1818 } 1819 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1820 *norm = PetscSqrtReal(*norm); 1821 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1822 } else if (type == NORM_1) { /* max column norm */ 1823 PetscReal *tmp, *tmp2; 1824 PetscInt *jj, *garray = aij->garray; 1825 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1826 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1827 *norm = 0.0; 1828 v = amata; 1829 jj = amat->j; 1830 for (j = 0; j < amat->nz; j++) { 1831 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1832 v++; 1833 } 1834 v = bmata; 1835 jj = bmat->j; 1836 for (j = 0; j < bmat->nz; j++) { 1837 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1838 v++; 1839 } 1840 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1841 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1842 for (j = 0; j < mat->cmap->N; j++) { 1843 if (tmp2[j] > *norm) *norm = tmp2[j]; 1844 } 1845 PetscCall(PetscFree(tmp)); 1846 PetscCall(PetscFree(tmp2)); 1847 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1848 } else if (type == NORM_INFINITY) { /* max row norm */ 1849 PetscReal ntemp = 0.0; 1850 for (j = 0; j < aij->A->rmap->n; j++) { 1851 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1852 sum = 0.0; 1853 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1854 sum += PetscAbsScalar(*v); 1855 v++; 1856 } 1857 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1858 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1859 sum += PetscAbsScalar(*v); 1860 v++; 1861 } 1862 if (sum > ntemp) ntemp = sum; 1863 } 1864 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1865 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1866 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1867 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1868 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1869 } 1870 PetscFunctionReturn(PETSC_SUCCESS); 1871 } 1872 1873 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1874 { 1875 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1876 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1877 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1878 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1879 Mat B, A_diag, *B_diag; 1880 const MatScalar *pbv, *bv; 1881 1882 PetscFunctionBegin; 1883 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1884 ma = A->rmap->n; 1885 na = A->cmap->n; 1886 mb = a->B->rmap->n; 1887 nb = a->B->cmap->n; 1888 ai = Aloc->i; 1889 aj = Aloc->j; 1890 bi = Bloc->i; 1891 bj = Bloc->j; 1892 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1893 PetscInt *d_nnz, *g_nnz, *o_nnz; 1894 PetscSFNode *oloc; 1895 PETSC_UNUSED PetscSF sf; 1896 1897 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1898 /* compute d_nnz for preallocation */ 1899 PetscCall(PetscArrayzero(d_nnz, na)); 1900 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1901 /* compute local off-diagonal contributions */ 1902 PetscCall(PetscArrayzero(g_nnz, nb)); 1903 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1904 /* map those to global */ 1905 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1906 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1907 PetscCall(PetscSFSetFromOptions(sf)); 1908 PetscCall(PetscArrayzero(o_nnz, na)); 1909 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1910 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1911 PetscCall(PetscSFDestroy(&sf)); 1912 1913 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1914 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1915 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1916 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1917 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1918 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1919 } else { 1920 B = *matout; 1921 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1922 } 1923 1924 b = (Mat_MPIAIJ *)B->data; 1925 A_diag = a->A; 1926 B_diag = &b->A; 1927 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1928 A_diag_ncol = A_diag->cmap->N; 1929 B_diag_ilen = sub_B_diag->ilen; 1930 B_diag_i = sub_B_diag->i; 1931 1932 /* Set ilen for diagonal of B */ 1933 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1934 1935 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1936 very quickly (=without using MatSetValues), because all writes are local. */ 1937 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1938 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1939 1940 /* copy over the B part */ 1941 PetscCall(PetscMalloc1(bi[mb], &cols)); 1942 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1943 pbv = bv; 1944 row = A->rmap->rstart; 1945 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1946 cols_tmp = cols; 1947 for (i = 0; i < mb; i++) { 1948 ncol = bi[i + 1] - bi[i]; 1949 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1950 row++; 1951 if (pbv) pbv += ncol; 1952 if (cols_tmp) cols_tmp += ncol; 1953 } 1954 PetscCall(PetscFree(cols)); 1955 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1956 1957 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1958 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1959 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1960 *matout = B; 1961 } else { 1962 PetscCall(MatHeaderMerge(A, &B)); 1963 } 1964 PetscFunctionReturn(PETSC_SUCCESS); 1965 } 1966 1967 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1968 { 1969 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1970 Mat a = aij->A, b = aij->B; 1971 PetscInt s1, s2, s3; 1972 1973 PetscFunctionBegin; 1974 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1975 if (rr) { 1976 PetscCall(VecGetLocalSize(rr, &s1)); 1977 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1978 /* Overlap communication with computation. */ 1979 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1980 } 1981 if (ll) { 1982 PetscCall(VecGetLocalSize(ll, &s1)); 1983 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1984 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1985 } 1986 /* scale the diagonal block */ 1987 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1988 1989 if (rr) { 1990 /* Do a scatter end and then right scale the off-diagonal block */ 1991 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1992 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1993 } 1994 PetscFunctionReturn(PETSC_SUCCESS); 1995 } 1996 1997 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1998 { 1999 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2000 2001 PetscFunctionBegin; 2002 PetscCall(MatSetUnfactored(a->A)); 2003 PetscFunctionReturn(PETSC_SUCCESS); 2004 } 2005 2006 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2007 { 2008 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2009 Mat a, b, c, d; 2010 PetscBool flg; 2011 2012 PetscFunctionBegin; 2013 a = matA->A; 2014 b = matA->B; 2015 c = matB->A; 2016 d = matB->B; 2017 2018 PetscCall(MatEqual(a, c, &flg)); 2019 if (flg) PetscCall(MatEqual(b, d, &flg)); 2020 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2021 PetscFunctionReturn(PETSC_SUCCESS); 2022 } 2023 2024 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2025 { 2026 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2027 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2028 2029 PetscFunctionBegin; 2030 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2031 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2032 /* because of the column compression in the off-processor part of the matrix a->B, 2033 the number of columns in a->B and b->B may be different, hence we cannot call 2034 the MatCopy() directly on the two parts. If need be, we can provide a more 2035 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2036 then copying the submatrices */ 2037 PetscCall(MatCopy_Basic(A, B, str)); 2038 } else { 2039 PetscCall(MatCopy(a->A, b->A, str)); 2040 PetscCall(MatCopy(a->B, b->B, str)); 2041 } 2042 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 /* 2047 Computes the number of nonzeros per row needed for preallocation when X and Y 2048 have different nonzero structure. 2049 */ 2050 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2051 { 2052 PetscInt i, j, k, nzx, nzy; 2053 2054 PetscFunctionBegin; 2055 /* Set the number of nonzeros in the new matrix */ 2056 for (i = 0; i < m; i++) { 2057 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2058 nzx = xi[i + 1] - xi[i]; 2059 nzy = yi[i + 1] - yi[i]; 2060 nnz[i] = 0; 2061 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2062 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2063 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2064 nnz[i]++; 2065 } 2066 for (; k < nzy; k++) nnz[i]++; 2067 } 2068 PetscFunctionReturn(PETSC_SUCCESS); 2069 } 2070 2071 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2072 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2073 { 2074 PetscInt m = Y->rmap->N; 2075 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2076 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2077 2078 PetscFunctionBegin; 2079 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2084 { 2085 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2086 2087 PetscFunctionBegin; 2088 if (str == SAME_NONZERO_PATTERN) { 2089 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2090 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2091 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2092 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2093 } else { 2094 Mat B; 2095 PetscInt *nnz_d, *nnz_o; 2096 2097 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2098 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2099 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2100 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2101 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2102 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2103 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2104 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2105 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2106 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2107 PetscCall(MatHeaderMerge(Y, &B)); 2108 PetscCall(PetscFree(nnz_d)); 2109 PetscCall(PetscFree(nnz_o)); 2110 } 2111 PetscFunctionReturn(PETSC_SUCCESS); 2112 } 2113 2114 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2115 2116 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2117 { 2118 PetscFunctionBegin; 2119 if (PetscDefined(USE_COMPLEX)) { 2120 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2121 2122 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2123 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2124 } 2125 PetscFunctionReturn(PETSC_SUCCESS); 2126 } 2127 2128 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2129 { 2130 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2131 2132 PetscFunctionBegin; 2133 PetscCall(MatRealPart(a->A)); 2134 PetscCall(MatRealPart(a->B)); 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatImaginaryPart(a->A)); 2144 PetscCall(MatImaginaryPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 PetscInt i, *idxb = NULL, m = A->rmap->n; 2152 PetscScalar *va, *vv; 2153 Vec vB, vA; 2154 const PetscScalar *vb; 2155 2156 PetscFunctionBegin; 2157 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2158 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2159 2160 PetscCall(VecGetArrayWrite(vA, &va)); 2161 if (idx) { 2162 for (i = 0; i < m; i++) { 2163 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2164 } 2165 } 2166 2167 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2168 PetscCall(PetscMalloc1(m, &idxb)); 2169 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2170 2171 PetscCall(VecGetArrayWrite(v, &vv)); 2172 PetscCall(VecGetArrayRead(vB, &vb)); 2173 for (i = 0; i < m; i++) { 2174 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2175 vv[i] = vb[i]; 2176 if (idx) idx[i] = a->garray[idxb[i]]; 2177 } else { 2178 vv[i] = va[i]; 2179 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2180 } 2181 } 2182 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2183 PetscCall(VecRestoreArrayWrite(vA, &va)); 2184 PetscCall(VecRestoreArrayRead(vB, &vb)); 2185 PetscCall(PetscFree(idxb)); 2186 PetscCall(VecDestroy(&vA)); 2187 PetscCall(VecDestroy(&vB)); 2188 PetscFunctionReturn(PETSC_SUCCESS); 2189 } 2190 2191 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2192 { 2193 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2194 Vec vB, vA; 2195 2196 PetscFunctionBegin; 2197 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2198 PetscCall(MatGetRowSumAbs(a->A, vA)); 2199 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2200 PetscCall(MatGetRowSumAbs(a->B, vB)); 2201 PetscCall(VecAXPY(vA, 1.0, vB)); 2202 PetscCall(VecDestroy(&vB)); 2203 PetscCall(VecCopy(vA, v)); 2204 PetscCall(VecDestroy(&vA)); 2205 PetscFunctionReturn(PETSC_SUCCESS); 2206 } 2207 2208 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2209 { 2210 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2211 PetscInt m = A->rmap->n, n = A->cmap->n; 2212 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2213 PetscInt *cmap = mat->garray; 2214 PetscInt *diagIdx, *offdiagIdx; 2215 Vec diagV, offdiagV; 2216 PetscScalar *a, *diagA, *offdiagA; 2217 const PetscScalar *ba, *bav; 2218 PetscInt r, j, col, ncols, *bi, *bj; 2219 Mat B = mat->B; 2220 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2221 2222 PetscFunctionBegin; 2223 /* When a process holds entire A and other processes have no entry */ 2224 if (A->cmap->N == n) { 2225 PetscCall(VecGetArrayWrite(v, &diagA)); 2226 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2227 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2228 PetscCall(VecDestroy(&diagV)); 2229 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2230 PetscFunctionReturn(PETSC_SUCCESS); 2231 } else if (n == 0) { 2232 if (m) { 2233 PetscCall(VecGetArrayWrite(v, &a)); 2234 for (r = 0; r < m; r++) { 2235 a[r] = 0.0; 2236 if (idx) idx[r] = -1; 2237 } 2238 PetscCall(VecRestoreArrayWrite(v, &a)); 2239 } 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } 2242 2243 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2244 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2245 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2246 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2247 2248 /* Get offdiagIdx[] for implicit 0.0 */ 2249 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2250 ba = bav; 2251 bi = b->i; 2252 bj = b->j; 2253 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2254 for (r = 0; r < m; r++) { 2255 ncols = bi[r + 1] - bi[r]; 2256 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2257 offdiagA[r] = *ba; 2258 offdiagIdx[r] = cmap[0]; 2259 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2260 offdiagA[r] = 0.0; 2261 2262 /* Find first hole in the cmap */ 2263 for (j = 0; j < ncols; j++) { 2264 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2265 if (col > j && j < cstart) { 2266 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2267 break; 2268 } else if (col > j + n && j >= cstart) { 2269 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2270 break; 2271 } 2272 } 2273 if (j == ncols && ncols < A->cmap->N - n) { 2274 /* a hole is outside compressed Bcols */ 2275 if (ncols == 0) { 2276 if (cstart) { 2277 offdiagIdx[r] = 0; 2278 } else offdiagIdx[r] = cend; 2279 } else { /* ncols > 0 */ 2280 offdiagIdx[r] = cmap[ncols - 1] + 1; 2281 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2282 } 2283 } 2284 } 2285 2286 for (j = 0; j < ncols; j++) { 2287 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2288 offdiagA[r] = *ba; 2289 offdiagIdx[r] = cmap[*bj]; 2290 } 2291 ba++; 2292 bj++; 2293 } 2294 } 2295 2296 PetscCall(VecGetArrayWrite(v, &a)); 2297 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2298 for (r = 0; r < m; ++r) { 2299 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2300 a[r] = diagA[r]; 2301 if (idx) idx[r] = cstart + diagIdx[r]; 2302 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2303 a[r] = diagA[r]; 2304 if (idx) { 2305 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2306 idx[r] = cstart + diagIdx[r]; 2307 } else idx[r] = offdiagIdx[r]; 2308 } 2309 } else { 2310 a[r] = offdiagA[r]; 2311 if (idx) idx[r] = offdiagIdx[r]; 2312 } 2313 } 2314 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2315 PetscCall(VecRestoreArrayWrite(v, &a)); 2316 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2317 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2318 PetscCall(VecDestroy(&diagV)); 2319 PetscCall(VecDestroy(&offdiagV)); 2320 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2321 PetscFunctionReturn(PETSC_SUCCESS); 2322 } 2323 2324 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2325 { 2326 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2327 PetscInt m = A->rmap->n, n = A->cmap->n; 2328 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2329 PetscInt *cmap = mat->garray; 2330 PetscInt *diagIdx, *offdiagIdx; 2331 Vec diagV, offdiagV; 2332 PetscScalar *a, *diagA, *offdiagA; 2333 const PetscScalar *ba, *bav; 2334 PetscInt r, j, col, ncols, *bi, *bj; 2335 Mat B = mat->B; 2336 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2337 2338 PetscFunctionBegin; 2339 /* When a process holds entire A and other processes have no entry */ 2340 if (A->cmap->N == n) { 2341 PetscCall(VecGetArrayWrite(v, &diagA)); 2342 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2343 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2344 PetscCall(VecDestroy(&diagV)); 2345 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2346 PetscFunctionReturn(PETSC_SUCCESS); 2347 } else if (n == 0) { 2348 if (m) { 2349 PetscCall(VecGetArrayWrite(v, &a)); 2350 for (r = 0; r < m; r++) { 2351 a[r] = PETSC_MAX_REAL; 2352 if (idx) idx[r] = -1; 2353 } 2354 PetscCall(VecRestoreArrayWrite(v, &a)); 2355 } 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } 2358 2359 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2360 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2361 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2362 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2363 2364 /* Get offdiagIdx[] for implicit 0.0 */ 2365 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2366 ba = bav; 2367 bi = b->i; 2368 bj = b->j; 2369 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2370 for (r = 0; r < m; r++) { 2371 ncols = bi[r + 1] - bi[r]; 2372 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2373 offdiagA[r] = *ba; 2374 offdiagIdx[r] = cmap[0]; 2375 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2376 offdiagA[r] = 0.0; 2377 2378 /* Find first hole in the cmap */ 2379 for (j = 0; j < ncols; j++) { 2380 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2381 if (col > j && j < cstart) { 2382 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2383 break; 2384 } else if (col > j + n && j >= cstart) { 2385 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2386 break; 2387 } 2388 } 2389 if (j == ncols && ncols < A->cmap->N - n) { 2390 /* a hole is outside compressed Bcols */ 2391 if (ncols == 0) { 2392 if (cstart) { 2393 offdiagIdx[r] = 0; 2394 } else offdiagIdx[r] = cend; 2395 } else { /* ncols > 0 */ 2396 offdiagIdx[r] = cmap[ncols - 1] + 1; 2397 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2398 } 2399 } 2400 } 2401 2402 for (j = 0; j < ncols; j++) { 2403 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2404 offdiagA[r] = *ba; 2405 offdiagIdx[r] = cmap[*bj]; 2406 } 2407 ba++; 2408 bj++; 2409 } 2410 } 2411 2412 PetscCall(VecGetArrayWrite(v, &a)); 2413 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2414 for (r = 0; r < m; ++r) { 2415 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 if (idx) idx[r] = cstart + diagIdx[r]; 2418 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) { 2421 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2422 idx[r] = cstart + diagIdx[r]; 2423 } else idx[r] = offdiagIdx[r]; 2424 } 2425 } else { 2426 a[r] = offdiagA[r]; 2427 if (idx) idx[r] = offdiagIdx[r]; 2428 } 2429 } 2430 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2431 PetscCall(VecRestoreArrayWrite(v, &a)); 2432 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2433 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2434 PetscCall(VecDestroy(&diagV)); 2435 PetscCall(VecDestroy(&offdiagV)); 2436 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2437 PetscFunctionReturn(PETSC_SUCCESS); 2438 } 2439 2440 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2441 { 2442 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2443 PetscInt m = A->rmap->n, n = A->cmap->n; 2444 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2445 PetscInt *cmap = mat->garray; 2446 PetscInt *diagIdx, *offdiagIdx; 2447 Vec diagV, offdiagV; 2448 PetscScalar *a, *diagA, *offdiagA; 2449 const PetscScalar *ba, *bav; 2450 PetscInt r, j, col, ncols, *bi, *bj; 2451 Mat B = mat->B; 2452 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2453 2454 PetscFunctionBegin; 2455 /* When a process holds entire A and other processes have no entry */ 2456 if (A->cmap->N == n) { 2457 PetscCall(VecGetArrayWrite(v, &diagA)); 2458 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2459 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2460 PetscCall(VecDestroy(&diagV)); 2461 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2462 PetscFunctionReturn(PETSC_SUCCESS); 2463 } else if (n == 0) { 2464 if (m) { 2465 PetscCall(VecGetArrayWrite(v, &a)); 2466 for (r = 0; r < m; r++) { 2467 a[r] = PETSC_MIN_REAL; 2468 if (idx) idx[r] = -1; 2469 } 2470 PetscCall(VecRestoreArrayWrite(v, &a)); 2471 } 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } 2474 2475 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2476 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2477 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2478 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2479 2480 /* Get offdiagIdx[] for implicit 0.0 */ 2481 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2482 ba = bav; 2483 bi = b->i; 2484 bj = b->j; 2485 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2486 for (r = 0; r < m; r++) { 2487 ncols = bi[r + 1] - bi[r]; 2488 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2489 offdiagA[r] = *ba; 2490 offdiagIdx[r] = cmap[0]; 2491 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2492 offdiagA[r] = 0.0; 2493 2494 /* Find first hole in the cmap */ 2495 for (j = 0; j < ncols; j++) { 2496 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2497 if (col > j && j < cstart) { 2498 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2499 break; 2500 } else if (col > j + n && j >= cstart) { 2501 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2502 break; 2503 } 2504 } 2505 if (j == ncols && ncols < A->cmap->N - n) { 2506 /* a hole is outside compressed Bcols */ 2507 if (ncols == 0) { 2508 if (cstart) { 2509 offdiagIdx[r] = 0; 2510 } else offdiagIdx[r] = cend; 2511 } else { /* ncols > 0 */ 2512 offdiagIdx[r] = cmap[ncols - 1] + 1; 2513 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2514 } 2515 } 2516 } 2517 2518 for (j = 0; j < ncols; j++) { 2519 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2520 offdiagA[r] = *ba; 2521 offdiagIdx[r] = cmap[*bj]; 2522 } 2523 ba++; 2524 bj++; 2525 } 2526 } 2527 2528 PetscCall(VecGetArrayWrite(v, &a)); 2529 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2530 for (r = 0; r < m; ++r) { 2531 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2532 a[r] = diagA[r]; 2533 if (idx) idx[r] = cstart + diagIdx[r]; 2534 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2535 a[r] = diagA[r]; 2536 if (idx) { 2537 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2538 idx[r] = cstart + diagIdx[r]; 2539 } else idx[r] = offdiagIdx[r]; 2540 } 2541 } else { 2542 a[r] = offdiagA[r]; 2543 if (idx) idx[r] = offdiagIdx[r]; 2544 } 2545 } 2546 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2547 PetscCall(VecRestoreArrayWrite(v, &a)); 2548 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2549 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2550 PetscCall(VecDestroy(&diagV)); 2551 PetscCall(VecDestroy(&offdiagV)); 2552 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2553 PetscFunctionReturn(PETSC_SUCCESS); 2554 } 2555 2556 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2557 { 2558 Mat *dummy; 2559 2560 PetscFunctionBegin; 2561 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2562 *newmat = *dummy; 2563 PetscCall(PetscFree(dummy)); 2564 PetscFunctionReturn(PETSC_SUCCESS); 2565 } 2566 2567 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2568 { 2569 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2570 2571 PetscFunctionBegin; 2572 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2573 A->factorerrortype = a->A->factorerrortype; 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2578 { 2579 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2580 2581 PetscFunctionBegin; 2582 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2583 PetscCall(MatSetRandom(aij->A, rctx)); 2584 if (x->assembled) { 2585 PetscCall(MatSetRandom(aij->B, rctx)); 2586 } else { 2587 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2588 } 2589 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2590 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2591 PetscFunctionReturn(PETSC_SUCCESS); 2592 } 2593 2594 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2595 { 2596 PetscFunctionBegin; 2597 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2598 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2599 PetscFunctionReturn(PETSC_SUCCESS); 2600 } 2601 2602 /*@ 2603 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2604 2605 Not Collective 2606 2607 Input Parameter: 2608 . A - the matrix 2609 2610 Output Parameter: 2611 . nz - the number of nonzeros 2612 2613 Level: advanced 2614 2615 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2616 @*/ 2617 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2618 { 2619 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2620 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2621 PetscBool isaij; 2622 2623 PetscFunctionBegin; 2624 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2625 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2626 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2627 PetscFunctionReturn(PETSC_SUCCESS); 2628 } 2629 2630 /*@ 2631 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2632 2633 Collective 2634 2635 Input Parameters: 2636 + A - the matrix 2637 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2638 2639 Level: advanced 2640 2641 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2642 @*/ 2643 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2644 { 2645 PetscFunctionBegin; 2646 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2651 { 2652 PetscBool sc = PETSC_FALSE, flg; 2653 2654 PetscFunctionBegin; 2655 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2656 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2657 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2658 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2659 PetscOptionsHeadEnd(); 2660 PetscFunctionReturn(PETSC_SUCCESS); 2661 } 2662 2663 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2664 { 2665 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2666 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2667 2668 PetscFunctionBegin; 2669 if (!Y->preallocated) { 2670 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2671 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2672 PetscInt nonew = aij->nonew; 2673 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2674 aij->nonew = nonew; 2675 } 2676 PetscCall(MatShift_Basic(Y, a)); 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679 2680 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2681 { 2682 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2683 2684 PetscFunctionBegin; 2685 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2686 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2687 if (d) { 2688 PetscInt rstart; 2689 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2690 *d += rstart; 2691 } 2692 PetscFunctionReturn(PETSC_SUCCESS); 2693 } 2694 2695 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2696 { 2697 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2698 2699 PetscFunctionBegin; 2700 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2701 PetscFunctionReturn(PETSC_SUCCESS); 2702 } 2703 2704 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2705 { 2706 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2707 2708 PetscFunctionBegin; 2709 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2710 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2715 MatGetRow_MPIAIJ, 2716 MatRestoreRow_MPIAIJ, 2717 MatMult_MPIAIJ, 2718 /* 4*/ MatMultAdd_MPIAIJ, 2719 MatMultTranspose_MPIAIJ, 2720 MatMultTransposeAdd_MPIAIJ, 2721 NULL, 2722 NULL, 2723 NULL, 2724 /*10*/ NULL, 2725 NULL, 2726 NULL, 2727 MatSOR_MPIAIJ, 2728 MatTranspose_MPIAIJ, 2729 /*15*/ MatGetInfo_MPIAIJ, 2730 MatEqual_MPIAIJ, 2731 MatGetDiagonal_MPIAIJ, 2732 MatDiagonalScale_MPIAIJ, 2733 MatNorm_MPIAIJ, 2734 /*20*/ MatAssemblyBegin_MPIAIJ, 2735 MatAssemblyEnd_MPIAIJ, 2736 MatSetOption_MPIAIJ, 2737 MatZeroEntries_MPIAIJ, 2738 /*24*/ MatZeroRows_MPIAIJ, 2739 NULL, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*29*/ MatSetUp_MPI_Hash, 2744 NULL, 2745 NULL, 2746 MatGetDiagonalBlock_MPIAIJ, 2747 NULL, 2748 /*34*/ MatDuplicate_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*39*/ MatAXPY_MPIAIJ, 2754 MatCreateSubMatrices_MPIAIJ, 2755 MatIncreaseOverlap_MPIAIJ, 2756 MatGetValues_MPIAIJ, 2757 MatCopy_MPIAIJ, 2758 /*44*/ MatGetRowMax_MPIAIJ, 2759 MatScale_MPIAIJ, 2760 MatShift_MPIAIJ, 2761 MatDiagonalSet_MPIAIJ, 2762 MatZeroRowsColumns_MPIAIJ, 2763 /*49*/ MatSetRandom_MPIAIJ, 2764 MatGetRowIJ_MPIAIJ, 2765 MatRestoreRowIJ_MPIAIJ, 2766 NULL, 2767 NULL, 2768 /*54*/ MatFDColoringCreate_MPIXAIJ, 2769 NULL, 2770 MatSetUnfactored_MPIAIJ, 2771 MatPermute_MPIAIJ, 2772 NULL, 2773 /*59*/ MatCreateSubMatrix_MPIAIJ, 2774 MatDestroy_MPIAIJ, 2775 MatView_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*64*/ NULL, 2779 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2780 NULL, 2781 NULL, 2782 NULL, 2783 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2784 MatGetRowMinAbs_MPIAIJ, 2785 NULL, 2786 NULL, 2787 NULL, 2788 NULL, 2789 /*75*/ MatFDColoringApply_AIJ, 2790 MatSetFromOptions_MPIAIJ, 2791 NULL, 2792 NULL, 2793 MatFindZeroDiagonals_MPIAIJ, 2794 /*80*/ NULL, 2795 NULL, 2796 NULL, 2797 /*83*/ MatLoad_MPIAIJ, 2798 NULL, 2799 NULL, 2800 NULL, 2801 NULL, 2802 NULL, 2803 /*89*/ NULL, 2804 NULL, 2805 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2806 NULL, 2807 NULL, 2808 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2809 NULL, 2810 NULL, 2811 NULL, 2812 MatBindToCPU_MPIAIJ, 2813 /*99*/ MatProductSetFromOptions_MPIAIJ, 2814 NULL, 2815 NULL, 2816 MatConjugate_MPIAIJ, 2817 NULL, 2818 /*104*/ MatSetValuesRow_MPIAIJ, 2819 MatRealPart_MPIAIJ, 2820 MatImaginaryPart_MPIAIJ, 2821 NULL, 2822 NULL, 2823 /*109*/ NULL, 2824 NULL, 2825 MatGetRowMin_MPIAIJ, 2826 NULL, 2827 MatMissingDiagonal_MPIAIJ, 2828 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2829 NULL, 2830 MatGetGhosts_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2834 NULL, 2835 NULL, 2836 NULL, 2837 MatGetMultiProcBlock_MPIAIJ, 2838 /*124*/ MatFindNonzeroRows_MPIAIJ, 2839 MatGetColumnReductions_MPIAIJ, 2840 MatInvertBlockDiagonal_MPIAIJ, 2841 MatInvertVariableBlockDiagonal_MPIAIJ, 2842 MatCreateSubMatricesMPI_MPIAIJ, 2843 /*129*/ NULL, 2844 NULL, 2845 NULL, 2846 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2847 NULL, 2848 /*134*/ NULL, 2849 NULL, 2850 NULL, 2851 NULL, 2852 NULL, 2853 /*139*/ MatSetBlockSizes_MPIAIJ, 2854 NULL, 2855 NULL, 2856 MatFDColoringSetUp_MPIXAIJ, 2857 MatFindOffBlockDiagonalEntries_MPIAIJ, 2858 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2859 /*145*/ NULL, 2860 NULL, 2861 NULL, 2862 MatCreateGraph_Simple_AIJ, 2863 NULL, 2864 /*150*/ NULL, 2865 MatEliminateZeros_MPIAIJ, 2866 MatGetRowSumAbs_MPIAIJ, 2867 NULL, 2868 NULL, 2869 NULL}; 2870 2871 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2872 { 2873 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2874 2875 PetscFunctionBegin; 2876 PetscCall(MatStoreValues(aij->A)); 2877 PetscCall(MatStoreValues(aij->B)); 2878 PetscFunctionReturn(PETSC_SUCCESS); 2879 } 2880 2881 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2882 { 2883 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2884 2885 PetscFunctionBegin; 2886 PetscCall(MatRetrieveValues(aij->A)); 2887 PetscCall(MatRetrieveValues(aij->B)); 2888 PetscFunctionReturn(PETSC_SUCCESS); 2889 } 2890 2891 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2892 { 2893 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2894 PetscMPIInt size; 2895 2896 PetscFunctionBegin; 2897 if (B->hash_active) { 2898 B->ops[0] = b->cops; 2899 B->hash_active = PETSC_FALSE; 2900 } 2901 PetscCall(PetscLayoutSetUp(B->rmap)); 2902 PetscCall(PetscLayoutSetUp(B->cmap)); 2903 2904 #if defined(PETSC_USE_CTABLE) 2905 PetscCall(PetscHMapIDestroy(&b->colmap)); 2906 #else 2907 PetscCall(PetscFree(b->colmap)); 2908 #endif 2909 PetscCall(PetscFree(b->garray)); 2910 PetscCall(VecDestroy(&b->lvec)); 2911 PetscCall(VecScatterDestroy(&b->Mvctx)); 2912 2913 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2914 2915 MatSeqXAIJGetOptions_Private(b->B); 2916 PetscCall(MatDestroy(&b->B)); 2917 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2918 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2919 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2920 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2921 MatSeqXAIJRestoreOptions_Private(b->B); 2922 2923 MatSeqXAIJGetOptions_Private(b->A); 2924 PetscCall(MatDestroy(&b->A)); 2925 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2926 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2927 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2928 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2929 MatSeqXAIJRestoreOptions_Private(b->A); 2930 2931 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2932 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2933 B->preallocated = PETSC_TRUE; 2934 B->was_assembled = PETSC_FALSE; 2935 B->assembled = PETSC_FALSE; 2936 PetscFunctionReturn(PETSC_SUCCESS); 2937 } 2938 2939 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2940 { 2941 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2942 2943 PetscFunctionBegin; 2944 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2945 PetscCall(PetscLayoutSetUp(B->rmap)); 2946 PetscCall(PetscLayoutSetUp(B->cmap)); 2947 2948 #if defined(PETSC_USE_CTABLE) 2949 PetscCall(PetscHMapIDestroy(&b->colmap)); 2950 #else 2951 PetscCall(PetscFree(b->colmap)); 2952 #endif 2953 PetscCall(PetscFree(b->garray)); 2954 PetscCall(VecDestroy(&b->lvec)); 2955 PetscCall(VecScatterDestroy(&b->Mvctx)); 2956 2957 PetscCall(MatResetPreallocation(b->A)); 2958 PetscCall(MatResetPreallocation(b->B)); 2959 B->preallocated = PETSC_TRUE; 2960 B->was_assembled = PETSC_FALSE; 2961 B->assembled = PETSC_FALSE; 2962 PetscFunctionReturn(PETSC_SUCCESS); 2963 } 2964 2965 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2966 { 2967 Mat mat; 2968 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2969 2970 PetscFunctionBegin; 2971 *newmat = NULL; 2972 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2973 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2974 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2975 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2976 a = (Mat_MPIAIJ *)mat->data; 2977 2978 mat->factortype = matin->factortype; 2979 mat->assembled = matin->assembled; 2980 mat->insertmode = NOT_SET_VALUES; 2981 2982 a->size = oldmat->size; 2983 a->rank = oldmat->rank; 2984 a->donotstash = oldmat->donotstash; 2985 a->roworiented = oldmat->roworiented; 2986 a->rowindices = NULL; 2987 a->rowvalues = NULL; 2988 a->getrowactive = PETSC_FALSE; 2989 2990 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2991 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2992 if (matin->hash_active) { 2993 PetscCall(MatSetUp(mat)); 2994 } else { 2995 mat->preallocated = matin->preallocated; 2996 if (oldmat->colmap) { 2997 #if defined(PETSC_USE_CTABLE) 2998 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2999 #else 3000 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3001 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3002 #endif 3003 } else a->colmap = NULL; 3004 if (oldmat->garray) { 3005 PetscInt len; 3006 len = oldmat->B->cmap->n; 3007 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3008 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3009 } else a->garray = NULL; 3010 3011 /* It may happen MatDuplicate is called with a non-assembled matrix 3012 In fact, MatDuplicate only requires the matrix to be preallocated 3013 This may happen inside a DMCreateMatrix_Shell */ 3014 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3015 if (oldmat->Mvctx) { 3016 a->Mvctx = oldmat->Mvctx; 3017 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3018 } 3019 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3020 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3021 } 3022 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3023 *newmat = mat; 3024 PetscFunctionReturn(PETSC_SUCCESS); 3025 } 3026 3027 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3028 { 3029 PetscBool isbinary, ishdf5; 3030 3031 PetscFunctionBegin; 3032 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3033 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3034 /* force binary viewer to load .info file if it has not yet done so */ 3035 PetscCall(PetscViewerSetUp(viewer)); 3036 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3037 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3038 if (isbinary) { 3039 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3040 } else if (ishdf5) { 3041 #if defined(PETSC_HAVE_HDF5) 3042 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3043 #else 3044 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3045 #endif 3046 } else { 3047 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3048 } 3049 PetscFunctionReturn(PETSC_SUCCESS); 3050 } 3051 3052 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3053 { 3054 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3055 PetscInt *rowidxs, *colidxs; 3056 PetscScalar *matvals; 3057 3058 PetscFunctionBegin; 3059 PetscCall(PetscViewerSetUp(viewer)); 3060 3061 /* read in matrix header */ 3062 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3063 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3064 M = header[1]; 3065 N = header[2]; 3066 nz = header[3]; 3067 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3068 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3069 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3070 3071 /* set block sizes from the viewer's .info file */ 3072 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3073 /* set global sizes if not set already */ 3074 if (mat->rmap->N < 0) mat->rmap->N = M; 3075 if (mat->cmap->N < 0) mat->cmap->N = N; 3076 PetscCall(PetscLayoutSetUp(mat->rmap)); 3077 PetscCall(PetscLayoutSetUp(mat->cmap)); 3078 3079 /* check if the matrix sizes are correct */ 3080 PetscCall(MatGetSize(mat, &rows, &cols)); 3081 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3082 3083 /* read in row lengths and build row indices */ 3084 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3085 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3086 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3087 rowidxs[0] = 0; 3088 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3089 if (nz != PETSC_INT_MAX) { 3090 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3091 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3092 } 3093 3094 /* read in column indices and matrix values */ 3095 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3096 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3097 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3098 /* store matrix indices and values */ 3099 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3100 PetscCall(PetscFree(rowidxs)); 3101 PetscCall(PetscFree2(colidxs, matvals)); 3102 PetscFunctionReturn(PETSC_SUCCESS); 3103 } 3104 3105 /* Not scalable because of ISAllGather() unless getting all columns. */ 3106 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3107 { 3108 IS iscol_local; 3109 PetscBool isstride; 3110 PetscMPIInt lisstride = 0, gisstride; 3111 3112 PetscFunctionBegin; 3113 /* check if we are grabbing all columns*/ 3114 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3115 3116 if (isstride) { 3117 PetscInt start, len, mstart, mlen; 3118 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3119 PetscCall(ISGetLocalSize(iscol, &len)); 3120 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3121 if (mstart == start && mlen - mstart == len) lisstride = 1; 3122 } 3123 3124 PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3125 if (gisstride) { 3126 PetscInt N; 3127 PetscCall(MatGetSize(mat, NULL, &N)); 3128 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3129 PetscCall(ISSetIdentity(iscol_local)); 3130 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3131 } else { 3132 PetscInt cbs; 3133 PetscCall(ISGetBlockSize(iscol, &cbs)); 3134 PetscCall(ISAllGather(iscol, &iscol_local)); 3135 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3136 } 3137 3138 *isseq = iscol_local; 3139 PetscFunctionReturn(PETSC_SUCCESS); 3140 } 3141 3142 /* 3143 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3144 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3145 3146 Input Parameters: 3147 + mat - matrix 3148 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3149 i.e., mat->rstart <= isrow[i] < mat->rend 3150 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3151 i.e., mat->cstart <= iscol[i] < mat->cend 3152 3153 Output Parameters: 3154 + isrow_d - sequential row index set for retrieving mat->A 3155 . iscol_d - sequential column index set for retrieving mat->A 3156 . iscol_o - sequential column index set for retrieving mat->B 3157 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3158 */ 3159 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3160 { 3161 Vec x, cmap; 3162 const PetscInt *is_idx; 3163 PetscScalar *xarray, *cmaparray; 3164 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3165 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3166 Mat B = a->B; 3167 Vec lvec = a->lvec, lcmap; 3168 PetscInt i, cstart, cend, Bn = B->cmap->N; 3169 MPI_Comm comm; 3170 VecScatter Mvctx = a->Mvctx; 3171 3172 PetscFunctionBegin; 3173 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3174 PetscCall(ISGetLocalSize(iscol, &ncols)); 3175 3176 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3177 PetscCall(MatCreateVecs(mat, &x, NULL)); 3178 PetscCall(VecSet(x, -1.0)); 3179 PetscCall(VecDuplicate(x, &cmap)); 3180 PetscCall(VecSet(cmap, -1.0)); 3181 3182 /* Get start indices */ 3183 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3184 isstart -= ncols; 3185 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3186 3187 PetscCall(ISGetIndices(iscol, &is_idx)); 3188 PetscCall(VecGetArray(x, &xarray)); 3189 PetscCall(VecGetArray(cmap, &cmaparray)); 3190 PetscCall(PetscMalloc1(ncols, &idx)); 3191 for (i = 0; i < ncols; i++) { 3192 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3193 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3194 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3195 } 3196 PetscCall(VecRestoreArray(x, &xarray)); 3197 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3198 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3199 3200 /* Get iscol_d */ 3201 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3202 PetscCall(ISGetBlockSize(iscol, &i)); 3203 PetscCall(ISSetBlockSize(*iscol_d, i)); 3204 3205 /* Get isrow_d */ 3206 PetscCall(ISGetLocalSize(isrow, &m)); 3207 rstart = mat->rmap->rstart; 3208 PetscCall(PetscMalloc1(m, &idx)); 3209 PetscCall(ISGetIndices(isrow, &is_idx)); 3210 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3211 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3212 3213 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3214 PetscCall(ISGetBlockSize(isrow, &i)); 3215 PetscCall(ISSetBlockSize(*isrow_d, i)); 3216 3217 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3218 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3219 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3220 3221 PetscCall(VecDuplicate(lvec, &lcmap)); 3222 3223 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3224 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3225 3226 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3227 /* off-process column indices */ 3228 count = 0; 3229 PetscCall(PetscMalloc1(Bn, &idx)); 3230 PetscCall(PetscMalloc1(Bn, &cmap1)); 3231 3232 PetscCall(VecGetArray(lvec, &xarray)); 3233 PetscCall(VecGetArray(lcmap, &cmaparray)); 3234 for (i = 0; i < Bn; i++) { 3235 if (PetscRealPart(xarray[i]) > -1.0) { 3236 idx[count] = i; /* local column index in off-diagonal part B */ 3237 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3238 count++; 3239 } 3240 } 3241 PetscCall(VecRestoreArray(lvec, &xarray)); 3242 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3243 3244 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3245 /* cannot ensure iscol_o has same blocksize as iscol! */ 3246 3247 PetscCall(PetscFree(idx)); 3248 *garray = cmap1; 3249 3250 PetscCall(VecDestroy(&x)); 3251 PetscCall(VecDestroy(&cmap)); 3252 PetscCall(VecDestroy(&lcmap)); 3253 PetscFunctionReturn(PETSC_SUCCESS); 3254 } 3255 3256 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3257 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3258 { 3259 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3260 Mat M = NULL; 3261 MPI_Comm comm; 3262 IS iscol_d, isrow_d, iscol_o; 3263 Mat Asub = NULL, Bsub = NULL; 3264 PetscInt n; 3265 3266 PetscFunctionBegin; 3267 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3268 3269 if (call == MAT_REUSE_MATRIX) { 3270 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3271 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3272 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3273 3274 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3275 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3276 3277 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3278 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3279 3280 /* Update diagonal and off-diagonal portions of submat */ 3281 asub = (Mat_MPIAIJ *)(*submat)->data; 3282 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3283 PetscCall(ISGetLocalSize(iscol_o, &n)); 3284 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3285 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3286 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3287 3288 } else { /* call == MAT_INITIAL_MATRIX) */ 3289 PetscInt *garray; 3290 PetscInt BsubN; 3291 3292 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3293 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3294 3295 /* Create local submatrices Asub and Bsub */ 3296 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3298 3299 /* Create submatrix M */ 3300 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3301 3302 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3303 asub = (Mat_MPIAIJ *)M->data; 3304 3305 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3306 n = asub->B->cmap->N; 3307 if (BsubN > n) { 3308 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3309 const PetscInt *idx; 3310 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3311 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3312 3313 PetscCall(PetscMalloc1(n, &idx_new)); 3314 j = 0; 3315 PetscCall(ISGetIndices(iscol_o, &idx)); 3316 for (i = 0; i < n; i++) { 3317 if (j >= BsubN) break; 3318 while (subgarray[i] > garray[j]) j++; 3319 3320 if (subgarray[i] == garray[j]) { 3321 idx_new[i] = idx[j++]; 3322 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3323 } 3324 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3325 3326 PetscCall(ISDestroy(&iscol_o)); 3327 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3328 3329 } else if (BsubN < n) { 3330 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3331 } 3332 3333 PetscCall(PetscFree(garray)); 3334 *submat = M; 3335 3336 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3337 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3338 PetscCall(ISDestroy(&isrow_d)); 3339 3340 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3341 PetscCall(ISDestroy(&iscol_d)); 3342 3343 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3344 PetscCall(ISDestroy(&iscol_o)); 3345 } 3346 PetscFunctionReturn(PETSC_SUCCESS); 3347 } 3348 3349 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3350 { 3351 IS iscol_local = NULL, isrow_d; 3352 PetscInt csize; 3353 PetscInt n, i, j, start, end; 3354 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3355 MPI_Comm comm; 3356 3357 PetscFunctionBegin; 3358 /* If isrow has same processor distribution as mat, 3359 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3360 if (call == MAT_REUSE_MATRIX) { 3361 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3362 if (isrow_d) { 3363 sameRowDist = PETSC_TRUE; 3364 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3365 } else { 3366 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3367 if (iscol_local) { 3368 sameRowDist = PETSC_TRUE; 3369 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3370 } 3371 } 3372 } else { 3373 /* Check if isrow has same processor distribution as mat */ 3374 sameDist[0] = PETSC_FALSE; 3375 PetscCall(ISGetLocalSize(isrow, &n)); 3376 if (!n) { 3377 sameDist[0] = PETSC_TRUE; 3378 } else { 3379 PetscCall(ISGetMinMax(isrow, &i, &j)); 3380 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3381 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3382 } 3383 3384 /* Check if iscol has same processor distribution as mat */ 3385 sameDist[1] = PETSC_FALSE; 3386 PetscCall(ISGetLocalSize(iscol, &n)); 3387 if (!n) { 3388 sameDist[1] = PETSC_TRUE; 3389 } else { 3390 PetscCall(ISGetMinMax(iscol, &i, &j)); 3391 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3392 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3393 } 3394 3395 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3396 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3397 sameRowDist = tsameDist[0]; 3398 } 3399 3400 if (sameRowDist) { 3401 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3402 /* isrow and iscol have same processor distribution as mat */ 3403 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3404 PetscFunctionReturn(PETSC_SUCCESS); 3405 } else { /* sameRowDist */ 3406 /* isrow has same processor distribution as mat */ 3407 if (call == MAT_INITIAL_MATRIX) { 3408 PetscBool sorted; 3409 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3410 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3411 PetscCall(ISGetSize(iscol, &i)); 3412 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3413 3414 PetscCall(ISSorted(iscol_local, &sorted)); 3415 if (sorted) { 3416 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3417 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3418 PetscFunctionReturn(PETSC_SUCCESS); 3419 } 3420 } else { /* call == MAT_REUSE_MATRIX */ 3421 IS iscol_sub; 3422 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3423 if (iscol_sub) { 3424 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3425 PetscFunctionReturn(PETSC_SUCCESS); 3426 } 3427 } 3428 } 3429 } 3430 3431 /* General case: iscol -> iscol_local which has global size of iscol */ 3432 if (call == MAT_REUSE_MATRIX) { 3433 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3434 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3435 } else { 3436 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3437 } 3438 3439 PetscCall(ISGetLocalSize(iscol, &csize)); 3440 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3441 3442 if (call == MAT_INITIAL_MATRIX) { 3443 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3444 PetscCall(ISDestroy(&iscol_local)); 3445 } 3446 PetscFunctionReturn(PETSC_SUCCESS); 3447 } 3448 3449 /*@C 3450 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3451 and "off-diagonal" part of the matrix in CSR format. 3452 3453 Collective 3454 3455 Input Parameters: 3456 + comm - MPI communicator 3457 . A - "diagonal" portion of matrix 3458 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3459 - garray - global index of `B` columns 3460 3461 Output Parameter: 3462 . mat - the matrix, with input `A` as its local diagonal matrix 3463 3464 Level: advanced 3465 3466 Notes: 3467 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3468 3469 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3470 3471 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3472 @*/ 3473 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3474 { 3475 Mat_MPIAIJ *maij; 3476 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3477 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3478 const PetscScalar *oa; 3479 Mat Bnew; 3480 PetscInt m, n, N; 3481 MatType mpi_mat_type; 3482 3483 PetscFunctionBegin; 3484 PetscCall(MatCreate(comm, mat)); 3485 PetscCall(MatGetSize(A, &m, &n)); 3486 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3487 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3488 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3489 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3490 3491 /* Get global columns of mat */ 3492 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3493 3494 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3495 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3496 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3497 PetscCall(MatSetType(*mat, mpi_mat_type)); 3498 3499 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3500 maij = (Mat_MPIAIJ *)(*mat)->data; 3501 3502 (*mat)->preallocated = PETSC_TRUE; 3503 3504 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3505 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3506 3507 /* Set A as diagonal portion of *mat */ 3508 maij->A = A; 3509 3510 nz = oi[m]; 3511 for (i = 0; i < nz; i++) { 3512 col = oj[i]; 3513 oj[i] = garray[col]; 3514 } 3515 3516 /* Set Bnew as off-diagonal portion of *mat */ 3517 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3518 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3519 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3520 bnew = (Mat_SeqAIJ *)Bnew->data; 3521 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3522 maij->B = Bnew; 3523 3524 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3525 3526 b->free_a = PETSC_FALSE; 3527 b->free_ij = PETSC_FALSE; 3528 PetscCall(MatDestroy(&B)); 3529 3530 bnew->free_a = PETSC_TRUE; 3531 bnew->free_ij = PETSC_TRUE; 3532 3533 /* condense columns of maij->B */ 3534 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3535 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3536 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3537 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3538 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3539 PetscFunctionReturn(PETSC_SUCCESS); 3540 } 3541 3542 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3543 3544 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3545 { 3546 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3547 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3548 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3549 Mat M, Msub, B = a->B; 3550 MatScalar *aa; 3551 Mat_SeqAIJ *aij; 3552 PetscInt *garray = a->garray, *colsub, Ncols; 3553 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3554 IS iscol_sub, iscmap; 3555 const PetscInt *is_idx, *cmap; 3556 PetscBool allcolumns = PETSC_FALSE; 3557 MPI_Comm comm; 3558 3559 PetscFunctionBegin; 3560 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3561 if (call == MAT_REUSE_MATRIX) { 3562 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3563 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3564 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3565 3566 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3567 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3568 3569 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3570 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3571 3572 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3573 3574 } else { /* call == MAT_INITIAL_MATRIX) */ 3575 PetscBool flg; 3576 3577 PetscCall(ISGetLocalSize(iscol, &n)); 3578 PetscCall(ISGetSize(iscol, &Ncols)); 3579 3580 /* (1) iscol -> nonscalable iscol_local */ 3581 /* Check for special case: each processor gets entire matrix columns */ 3582 PetscCall(ISIdentity(iscol_local, &flg)); 3583 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3584 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3585 if (allcolumns) { 3586 iscol_sub = iscol_local; 3587 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3588 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3589 3590 } else { 3591 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3592 PetscInt *idx, *cmap1, k; 3593 PetscCall(PetscMalloc1(Ncols, &idx)); 3594 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3595 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3596 count = 0; 3597 k = 0; 3598 for (i = 0; i < Ncols; i++) { 3599 j = is_idx[i]; 3600 if (j >= cstart && j < cend) { 3601 /* diagonal part of mat */ 3602 idx[count] = j; 3603 cmap1[count++] = i; /* column index in submat */ 3604 } else if (Bn) { 3605 /* off-diagonal part of mat */ 3606 if (j == garray[k]) { 3607 idx[count] = j; 3608 cmap1[count++] = i; /* column index in submat */ 3609 } else if (j > garray[k]) { 3610 while (j > garray[k] && k < Bn - 1) k++; 3611 if (j == garray[k]) { 3612 idx[count] = j; 3613 cmap1[count++] = i; /* column index in submat */ 3614 } 3615 } 3616 } 3617 } 3618 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3619 3620 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3621 PetscCall(ISGetBlockSize(iscol, &cbs)); 3622 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3623 3624 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3625 } 3626 3627 /* (3) Create sequential Msub */ 3628 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3629 } 3630 3631 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3632 aij = (Mat_SeqAIJ *)Msub->data; 3633 ii = aij->i; 3634 PetscCall(ISGetIndices(iscmap, &cmap)); 3635 3636 /* 3637 m - number of local rows 3638 Ncols - number of columns (same on all processors) 3639 rstart - first row in new global matrix generated 3640 */ 3641 PetscCall(MatGetSize(Msub, &m, NULL)); 3642 3643 if (call == MAT_INITIAL_MATRIX) { 3644 /* (4) Create parallel newmat */ 3645 PetscMPIInt rank, size; 3646 PetscInt csize; 3647 3648 PetscCallMPI(MPI_Comm_size(comm, &size)); 3649 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3650 3651 /* 3652 Determine the number of non-zeros in the diagonal and off-diagonal 3653 portions of the matrix in order to do correct preallocation 3654 */ 3655 3656 /* first get start and end of "diagonal" columns */ 3657 PetscCall(ISGetLocalSize(iscol, &csize)); 3658 if (csize == PETSC_DECIDE) { 3659 PetscCall(ISGetSize(isrow, &mglobal)); 3660 if (mglobal == Ncols) { /* square matrix */ 3661 nlocal = m; 3662 } else { 3663 nlocal = Ncols / size + ((Ncols % size) > rank); 3664 } 3665 } else { 3666 nlocal = csize; 3667 } 3668 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3669 rstart = rend - nlocal; 3670 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3671 3672 /* next, compute all the lengths */ 3673 jj = aij->j; 3674 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3675 olens = dlens + m; 3676 for (i = 0; i < m; i++) { 3677 jend = ii[i + 1] - ii[i]; 3678 olen = 0; 3679 dlen = 0; 3680 for (j = 0; j < jend; j++) { 3681 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3682 else dlen++; 3683 jj++; 3684 } 3685 olens[i] = olen; 3686 dlens[i] = dlen; 3687 } 3688 3689 PetscCall(ISGetBlockSize(isrow, &bs)); 3690 PetscCall(ISGetBlockSize(iscol, &cbs)); 3691 3692 PetscCall(MatCreate(comm, &M)); 3693 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3694 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3695 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3696 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3697 PetscCall(PetscFree(dlens)); 3698 3699 } else { /* call == MAT_REUSE_MATRIX */ 3700 M = *newmat; 3701 PetscCall(MatGetLocalSize(M, &i, NULL)); 3702 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3703 PetscCall(MatZeroEntries(M)); 3704 /* 3705 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3706 rather than the slower MatSetValues(). 3707 */ 3708 M->was_assembled = PETSC_TRUE; 3709 M->assembled = PETSC_FALSE; 3710 } 3711 3712 /* (5) Set values of Msub to *newmat */ 3713 PetscCall(PetscMalloc1(count, &colsub)); 3714 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3715 3716 jj = aij->j; 3717 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3718 for (i = 0; i < m; i++) { 3719 row = rstart + i; 3720 nz = ii[i + 1] - ii[i]; 3721 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3722 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3723 jj += nz; 3724 aa += nz; 3725 } 3726 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3727 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3728 3729 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3730 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3731 3732 PetscCall(PetscFree(colsub)); 3733 3734 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3735 if (call == MAT_INITIAL_MATRIX) { 3736 *newmat = M; 3737 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3738 PetscCall(MatDestroy(&Msub)); 3739 3740 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3741 PetscCall(ISDestroy(&iscol_sub)); 3742 3743 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3744 PetscCall(ISDestroy(&iscmap)); 3745 3746 if (iscol_local) { 3747 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3748 PetscCall(ISDestroy(&iscol_local)); 3749 } 3750 } 3751 PetscFunctionReturn(PETSC_SUCCESS); 3752 } 3753 3754 /* 3755 Not great since it makes two copies of the submatrix, first an SeqAIJ 3756 in local and then by concatenating the local matrices the end result. 3757 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3758 3759 This requires a sequential iscol with all indices. 3760 */ 3761 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3762 { 3763 PetscMPIInt rank, size; 3764 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3765 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3766 Mat M, Mreuse; 3767 MatScalar *aa, *vwork; 3768 MPI_Comm comm; 3769 Mat_SeqAIJ *aij; 3770 PetscBool colflag, allcolumns = PETSC_FALSE; 3771 3772 PetscFunctionBegin; 3773 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3774 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3775 PetscCallMPI(MPI_Comm_size(comm, &size)); 3776 3777 /* Check for special case: each processor gets entire matrix columns */ 3778 PetscCall(ISIdentity(iscol, &colflag)); 3779 PetscCall(ISGetLocalSize(iscol, &n)); 3780 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3781 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3782 3783 if (call == MAT_REUSE_MATRIX) { 3784 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3785 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3786 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3787 } else { 3788 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3789 } 3790 3791 /* 3792 m - number of local rows 3793 n - number of columns (same on all processors) 3794 rstart - first row in new global matrix generated 3795 */ 3796 PetscCall(MatGetSize(Mreuse, &m, &n)); 3797 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3798 if (call == MAT_INITIAL_MATRIX) { 3799 aij = (Mat_SeqAIJ *)Mreuse->data; 3800 ii = aij->i; 3801 jj = aij->j; 3802 3803 /* 3804 Determine the number of non-zeros in the diagonal and off-diagonal 3805 portions of the matrix in order to do correct preallocation 3806 */ 3807 3808 /* first get start and end of "diagonal" columns */ 3809 if (csize == PETSC_DECIDE) { 3810 PetscCall(ISGetSize(isrow, &mglobal)); 3811 if (mglobal == n) { /* square matrix */ 3812 nlocal = m; 3813 } else { 3814 nlocal = n / size + ((n % size) > rank); 3815 } 3816 } else { 3817 nlocal = csize; 3818 } 3819 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3820 rstart = rend - nlocal; 3821 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3822 3823 /* next, compute all the lengths */ 3824 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3825 olens = dlens + m; 3826 for (i = 0; i < m; i++) { 3827 jend = ii[i + 1] - ii[i]; 3828 olen = 0; 3829 dlen = 0; 3830 for (j = 0; j < jend; j++) { 3831 if (*jj < rstart || *jj >= rend) olen++; 3832 else dlen++; 3833 jj++; 3834 } 3835 olens[i] = olen; 3836 dlens[i] = dlen; 3837 } 3838 PetscCall(MatCreate(comm, &M)); 3839 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3840 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3841 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3842 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3843 PetscCall(PetscFree(dlens)); 3844 } else { 3845 PetscInt ml, nl; 3846 3847 M = *newmat; 3848 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3849 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3850 PetscCall(MatZeroEntries(M)); 3851 /* 3852 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3853 rather than the slower MatSetValues(). 3854 */ 3855 M->was_assembled = PETSC_TRUE; 3856 M->assembled = PETSC_FALSE; 3857 } 3858 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3859 aij = (Mat_SeqAIJ *)Mreuse->data; 3860 ii = aij->i; 3861 jj = aij->j; 3862 3863 /* trigger copy to CPU if needed */ 3864 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3865 for (i = 0; i < m; i++) { 3866 row = rstart + i; 3867 nz = ii[i + 1] - ii[i]; 3868 cwork = jj; 3869 jj = PetscSafePointerPlusOffset(jj, nz); 3870 vwork = aa; 3871 aa = PetscSafePointerPlusOffset(aa, nz); 3872 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3873 } 3874 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3875 3876 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3877 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3878 *newmat = M; 3879 3880 /* save submatrix used in processor for next request */ 3881 if (call == MAT_INITIAL_MATRIX) { 3882 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3883 PetscCall(MatDestroy(&Mreuse)); 3884 } 3885 PetscFunctionReturn(PETSC_SUCCESS); 3886 } 3887 3888 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3889 { 3890 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3891 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3892 const PetscInt *JJ; 3893 PetscBool nooffprocentries; 3894 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3895 3896 PetscFunctionBegin; 3897 PetscCall(PetscLayoutSetUp(B->rmap)); 3898 PetscCall(PetscLayoutSetUp(B->cmap)); 3899 m = B->rmap->n; 3900 cstart = B->cmap->rstart; 3901 cend = B->cmap->rend; 3902 rstart = B->rmap->rstart; 3903 irstart = Ii[0]; 3904 3905 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3906 3907 if (PetscDefined(USE_DEBUG)) { 3908 for (i = 0; i < m; i++) { 3909 nnz = Ii[i + 1] - Ii[i]; 3910 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3911 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3912 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3913 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3914 } 3915 } 3916 3917 for (i = 0; i < m; i++) { 3918 nnz = Ii[i + 1] - Ii[i]; 3919 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3920 nnz_max = PetscMax(nnz_max, nnz); 3921 d = 0; 3922 for (j = 0; j < nnz; j++) { 3923 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3924 } 3925 d_nnz[i] = d; 3926 o_nnz[i] = nnz - d; 3927 } 3928 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3929 PetscCall(PetscFree2(d_nnz, o_nnz)); 3930 3931 for (i = 0; i < m; i++) { 3932 ii = i + rstart; 3933 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3934 } 3935 nooffprocentries = B->nooffprocentries; 3936 B->nooffprocentries = PETSC_TRUE; 3937 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3938 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3939 B->nooffprocentries = nooffprocentries; 3940 3941 /* count number of entries below block diagonal */ 3942 PetscCall(PetscFree(Aij->ld)); 3943 PetscCall(PetscCalloc1(m, &ld)); 3944 Aij->ld = ld; 3945 for (i = 0; i < m; i++) { 3946 nnz = Ii[i + 1] - Ii[i]; 3947 j = 0; 3948 while (j < nnz && J[j] < cstart) j++; 3949 ld[i] = j; 3950 if (J) J += nnz; 3951 } 3952 3953 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3954 PetscFunctionReturn(PETSC_SUCCESS); 3955 } 3956 3957 /*@ 3958 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3959 (the default parallel PETSc format). 3960 3961 Collective 3962 3963 Input Parameters: 3964 + B - the matrix 3965 . i - the indices into `j` for the start of each local row (indices start with zero) 3966 . j - the column indices for each local row (indices start with zero) 3967 - v - optional values in the matrix 3968 3969 Level: developer 3970 3971 Notes: 3972 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3973 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3974 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3975 3976 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3977 3978 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3979 3980 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3981 3982 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3983 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3984 3985 The format which is used for the sparse matrix input, is equivalent to a 3986 row-major ordering.. i.e for the following matrix, the input data expected is 3987 as shown 3988 .vb 3989 1 0 0 3990 2 0 3 P0 3991 ------- 3992 4 5 6 P1 3993 3994 Process0 [P0] rows_owned=[0,1] 3995 i = {0,1,3} [size = nrow+1 = 2+1] 3996 j = {0,0,2} [size = 3] 3997 v = {1,2,3} [size = 3] 3998 3999 Process1 [P1] rows_owned=[2] 4000 i = {0,3} [size = nrow+1 = 1+1] 4001 j = {0,1,2} [size = 3] 4002 v = {4,5,6} [size = 3] 4003 .ve 4004 4005 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4006 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4007 @*/ 4008 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4009 { 4010 PetscFunctionBegin; 4011 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4012 PetscFunctionReturn(PETSC_SUCCESS); 4013 } 4014 4015 /*@ 4016 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4017 (the default parallel PETSc format). For good matrix assembly performance 4018 the user should preallocate the matrix storage by setting the parameters 4019 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4020 4021 Collective 4022 4023 Input Parameters: 4024 + B - the matrix 4025 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4026 (same value is used for all local rows) 4027 . d_nnz - array containing the number of nonzeros in the various rows of the 4028 DIAGONAL portion of the local submatrix (possibly different for each row) 4029 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4030 The size of this array is equal to the number of local rows, i.e 'm'. 4031 For matrices that will be factored, you must leave room for (and set) 4032 the diagonal entry even if it is zero. 4033 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4034 submatrix (same value is used for all local rows). 4035 - o_nnz - array containing the number of nonzeros in the various rows of the 4036 OFF-DIAGONAL portion of the local submatrix (possibly different for 4037 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4038 structure. The size of this array is equal to the number 4039 of local rows, i.e 'm'. 4040 4041 Example Usage: 4042 Consider the following 8x8 matrix with 34 non-zero values, that is 4043 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4044 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4045 as follows 4046 4047 .vb 4048 1 2 0 | 0 3 0 | 0 4 4049 Proc0 0 5 6 | 7 0 0 | 8 0 4050 9 0 10 | 11 0 0 | 12 0 4051 ------------------------------------- 4052 13 0 14 | 15 16 17 | 0 0 4053 Proc1 0 18 0 | 19 20 21 | 0 0 4054 0 0 0 | 22 23 0 | 24 0 4055 ------------------------------------- 4056 Proc2 25 26 27 | 0 0 28 | 29 0 4057 30 0 0 | 31 32 33 | 0 34 4058 .ve 4059 4060 This can be represented as a collection of submatrices as 4061 .vb 4062 A B C 4063 D E F 4064 G H I 4065 .ve 4066 4067 Where the submatrices A,B,C are owned by proc0, D,E,F are 4068 owned by proc1, G,H,I are owned by proc2. 4069 4070 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4071 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4072 The 'M','N' parameters are 8,8, and have the same values on all procs. 4073 4074 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4075 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4076 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4077 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4078 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4079 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4080 4081 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4082 allocated for every row of the local diagonal submatrix, and `o_nz` 4083 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4084 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4085 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4086 In this case, the values of `d_nz`, `o_nz` are 4087 .vb 4088 proc0 dnz = 2, o_nz = 2 4089 proc1 dnz = 3, o_nz = 2 4090 proc2 dnz = 1, o_nz = 4 4091 .ve 4092 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4093 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4094 for proc3. i.e we are using 12+15+10=37 storage locations to store 4095 34 values. 4096 4097 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4098 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4099 In the above case the values for `d_nnz`, `o_nnz` are 4100 .vb 4101 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4102 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4103 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4104 .ve 4105 Here the space allocated is sum of all the above values i.e 34, and 4106 hence pre-allocation is perfect. 4107 4108 Level: intermediate 4109 4110 Notes: 4111 If the *_nnz parameter is given then the *_nz parameter is ignored 4112 4113 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4114 storage. The stored row and column indices begin with zero. 4115 See [Sparse Matrices](sec_matsparse) for details. 4116 4117 The parallel matrix is partitioned such that the first m0 rows belong to 4118 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4119 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4120 4121 The DIAGONAL portion of the local submatrix of a processor can be defined 4122 as the submatrix which is obtained by extraction the part corresponding to 4123 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4124 first row that belongs to the processor, r2 is the last row belonging to 4125 the this processor, and c1-c2 is range of indices of the local part of a 4126 vector suitable for applying the matrix to. This is an mxn matrix. In the 4127 common case of a square matrix, the row and column ranges are the same and 4128 the DIAGONAL part is also square. The remaining portion of the local 4129 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4130 4131 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4132 4133 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4134 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4135 You can also run with the option `-info` and look for messages with the string 4136 malloc in them to see if additional memory allocation was needed. 4137 4138 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4139 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4140 @*/ 4141 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4142 { 4143 PetscFunctionBegin; 4144 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4145 PetscValidType(B, 1); 4146 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4147 PetscFunctionReturn(PETSC_SUCCESS); 4148 } 4149 4150 /*@ 4151 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4152 CSR format for the local rows. 4153 4154 Collective 4155 4156 Input Parameters: 4157 + comm - MPI communicator 4158 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4159 . n - This value should be the same as the local size used in creating the 4160 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4161 calculated if `N` is given) For square matrices n is almost always `m`. 4162 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4163 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4164 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4165 . j - global column indices 4166 - a - optional matrix values 4167 4168 Output Parameter: 4169 . mat - the matrix 4170 4171 Level: intermediate 4172 4173 Notes: 4174 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4175 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4176 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4177 4178 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4179 4180 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4181 4182 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4183 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4184 4185 The format which is used for the sparse matrix input, is equivalent to a 4186 row-major ordering, i.e., for the following matrix, the input data expected is 4187 as shown 4188 .vb 4189 1 0 0 4190 2 0 3 P0 4191 ------- 4192 4 5 6 P1 4193 4194 Process0 [P0] rows_owned=[0,1] 4195 i = {0,1,3} [size = nrow+1 = 2+1] 4196 j = {0,0,2} [size = 3] 4197 v = {1,2,3} [size = 3] 4198 4199 Process1 [P1] rows_owned=[2] 4200 i = {0,3} [size = nrow+1 = 1+1] 4201 j = {0,1,2} [size = 3] 4202 v = {4,5,6} [size = 3] 4203 .ve 4204 4205 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4206 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4207 @*/ 4208 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4209 { 4210 PetscFunctionBegin; 4211 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4212 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4213 PetscCall(MatCreate(comm, mat)); 4214 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4215 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4216 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4217 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4218 PetscFunctionReturn(PETSC_SUCCESS); 4219 } 4220 4221 /*@ 4222 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4223 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4224 from `MatCreateMPIAIJWithArrays()` 4225 4226 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4227 4228 Collective 4229 4230 Input Parameters: 4231 + mat - the matrix 4232 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4233 . n - This value should be the same as the local size used in creating the 4234 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4235 calculated if N is given) For square matrices n is almost always m. 4236 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4237 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4238 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4239 . J - column indices 4240 - v - matrix values 4241 4242 Level: deprecated 4243 4244 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4245 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4246 @*/ 4247 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4248 { 4249 PetscInt nnz, i; 4250 PetscBool nooffprocentries; 4251 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4252 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4253 PetscScalar *ad, *ao; 4254 PetscInt ldi, Iii, md; 4255 const PetscInt *Adi = Ad->i; 4256 PetscInt *ld = Aij->ld; 4257 4258 PetscFunctionBegin; 4259 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4260 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4261 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4262 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4263 4264 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4265 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4266 4267 for (i = 0; i < m; i++) { 4268 if (PetscDefined(USE_DEBUG)) { 4269 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4270 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4271 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4272 } 4273 } 4274 nnz = Ii[i + 1] - Ii[i]; 4275 Iii = Ii[i]; 4276 ldi = ld[i]; 4277 md = Adi[i + 1] - Adi[i]; 4278 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4279 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4280 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4281 ad += md; 4282 ao += nnz - md; 4283 } 4284 nooffprocentries = mat->nooffprocentries; 4285 mat->nooffprocentries = PETSC_TRUE; 4286 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4287 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4288 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4289 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4290 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4291 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4292 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4293 mat->nooffprocentries = nooffprocentries; 4294 PetscFunctionReturn(PETSC_SUCCESS); 4295 } 4296 4297 /*@ 4298 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4299 4300 Collective 4301 4302 Input Parameters: 4303 + mat - the matrix 4304 - v - matrix values, stored by row 4305 4306 Level: intermediate 4307 4308 Notes: 4309 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4310 4311 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4312 4313 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4314 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4315 @*/ 4316 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4317 { 4318 PetscInt nnz, i, m; 4319 PetscBool nooffprocentries; 4320 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4321 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4322 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4323 PetscScalar *ad, *ao; 4324 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4325 PetscInt ldi, Iii, md; 4326 PetscInt *ld = Aij->ld; 4327 4328 PetscFunctionBegin; 4329 m = mat->rmap->n; 4330 4331 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4332 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4333 Iii = 0; 4334 for (i = 0; i < m; i++) { 4335 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4336 ldi = ld[i]; 4337 md = Adi[i + 1] - Adi[i]; 4338 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4339 ad += md; 4340 if (ao) { 4341 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4342 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4343 ao += nnz - md; 4344 } 4345 Iii += nnz; 4346 } 4347 nooffprocentries = mat->nooffprocentries; 4348 mat->nooffprocentries = PETSC_TRUE; 4349 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4350 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4351 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4352 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4353 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4354 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4355 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4356 mat->nooffprocentries = nooffprocentries; 4357 PetscFunctionReturn(PETSC_SUCCESS); 4358 } 4359 4360 /*@ 4361 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4362 (the default parallel PETSc format). For good matrix assembly performance 4363 the user should preallocate the matrix storage by setting the parameters 4364 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4365 4366 Collective 4367 4368 Input Parameters: 4369 + comm - MPI communicator 4370 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4371 This value should be the same as the local size used in creating the 4372 y vector for the matrix-vector product y = Ax. 4373 . n - This value should be the same as the local size used in creating the 4374 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4375 calculated if N is given) For square matrices n is almost always m. 4376 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4377 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4378 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4379 (same value is used for all local rows) 4380 . d_nnz - array containing the number of nonzeros in the various rows of the 4381 DIAGONAL portion of the local submatrix (possibly different for each row) 4382 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4383 The size of this array is equal to the number of local rows, i.e 'm'. 4384 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4385 submatrix (same value is used for all local rows). 4386 - o_nnz - array containing the number of nonzeros in the various rows of the 4387 OFF-DIAGONAL portion of the local submatrix (possibly different for 4388 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4389 structure. The size of this array is equal to the number 4390 of local rows, i.e 'm'. 4391 4392 Output Parameter: 4393 . A - the matrix 4394 4395 Options Database Keys: 4396 + -mat_no_inode - Do not use inodes 4397 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4398 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4399 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4400 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4401 4402 Level: intermediate 4403 4404 Notes: 4405 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4406 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4407 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4408 4409 If the *_nnz parameter is given then the *_nz parameter is ignored 4410 4411 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4412 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4413 storage requirements for this matrix. 4414 4415 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4416 processor than it must be used on all processors that share the object for 4417 that argument. 4418 4419 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4420 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4421 4422 The user MUST specify either the local or global matrix dimensions 4423 (possibly both). 4424 4425 The parallel matrix is partitioned across processors such that the 4426 first `m0` rows belong to process 0, the next `m1` rows belong to 4427 process 1, the next `m2` rows belong to process 2, etc., where 4428 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4429 values corresponding to [m x N] submatrix. 4430 4431 The columns are logically partitioned with the n0 columns belonging 4432 to 0th partition, the next n1 columns belonging to the next 4433 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4434 4435 The DIAGONAL portion of the local submatrix on any given processor 4436 is the submatrix corresponding to the rows and columns m,n 4437 corresponding to the given processor. i.e diagonal matrix on 4438 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4439 etc. The remaining portion of the local submatrix [m x (N-n)] 4440 constitute the OFF-DIAGONAL portion. The example below better 4441 illustrates this concept. 4442 4443 For a square global matrix we define each processor's diagonal portion 4444 to be its local rows and the corresponding columns (a square submatrix); 4445 each processor's off-diagonal portion encompasses the remainder of the 4446 local matrix (a rectangular submatrix). 4447 4448 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4449 4450 When calling this routine with a single process communicator, a matrix of 4451 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4452 type of communicator, use the construction mechanism 4453 .vb 4454 MatCreate(..., &A); 4455 MatSetType(A, MATMPIAIJ); 4456 MatSetSizes(A, m, n, M, N); 4457 MatMPIAIJSetPreallocation(A, ...); 4458 .ve 4459 4460 By default, this format uses inodes (identical nodes) when possible. 4461 We search for consecutive rows with the same nonzero structure, thereby 4462 reusing matrix information to achieve increased efficiency. 4463 4464 Example Usage: 4465 Consider the following 8x8 matrix with 34 non-zero values, that is 4466 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4467 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4468 as follows 4469 4470 .vb 4471 1 2 0 | 0 3 0 | 0 4 4472 Proc0 0 5 6 | 7 0 0 | 8 0 4473 9 0 10 | 11 0 0 | 12 0 4474 ------------------------------------- 4475 13 0 14 | 15 16 17 | 0 0 4476 Proc1 0 18 0 | 19 20 21 | 0 0 4477 0 0 0 | 22 23 0 | 24 0 4478 ------------------------------------- 4479 Proc2 25 26 27 | 0 0 28 | 29 0 4480 30 0 0 | 31 32 33 | 0 34 4481 .ve 4482 4483 This can be represented as a collection of submatrices as 4484 4485 .vb 4486 A B C 4487 D E F 4488 G H I 4489 .ve 4490 4491 Where the submatrices A,B,C are owned by proc0, D,E,F are 4492 owned by proc1, G,H,I are owned by proc2. 4493 4494 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4495 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4496 The 'M','N' parameters are 8,8, and have the same values on all procs. 4497 4498 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4499 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4500 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4501 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4502 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4503 matrix, ans [DF] as another SeqAIJ matrix. 4504 4505 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4506 allocated for every row of the local diagonal submatrix, and `o_nz` 4507 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4508 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4509 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4510 In this case, the values of `d_nz`,`o_nz` are 4511 .vb 4512 proc0 dnz = 2, o_nz = 2 4513 proc1 dnz = 3, o_nz = 2 4514 proc2 dnz = 1, o_nz = 4 4515 .ve 4516 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4517 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4518 for proc3. i.e we are using 12+15+10=37 storage locations to store 4519 34 values. 4520 4521 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4522 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4523 In the above case the values for d_nnz,o_nnz are 4524 .vb 4525 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4526 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4527 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4528 .ve 4529 Here the space allocated is sum of all the above values i.e 34, and 4530 hence pre-allocation is perfect. 4531 4532 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4533 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4534 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4535 @*/ 4536 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4537 { 4538 PetscMPIInt size; 4539 4540 PetscFunctionBegin; 4541 PetscCall(MatCreate(comm, A)); 4542 PetscCall(MatSetSizes(*A, m, n, M, N)); 4543 PetscCallMPI(MPI_Comm_size(comm, &size)); 4544 if (size > 1) { 4545 PetscCall(MatSetType(*A, MATMPIAIJ)); 4546 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4547 } else { 4548 PetscCall(MatSetType(*A, MATSEQAIJ)); 4549 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4550 } 4551 PetscFunctionReturn(PETSC_SUCCESS); 4552 } 4553 4554 /*MC 4555 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4556 4557 Synopsis: 4558 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4559 4560 Not Collective 4561 4562 Input Parameter: 4563 . A - the `MATMPIAIJ` matrix 4564 4565 Output Parameters: 4566 + Ad - the diagonal portion of the matrix 4567 . Ao - the off-diagonal portion of the matrix 4568 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4569 - ierr - error code 4570 4571 Level: advanced 4572 4573 Note: 4574 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4575 4576 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4577 M*/ 4578 4579 /*MC 4580 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4581 4582 Synopsis: 4583 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4584 4585 Not Collective 4586 4587 Input Parameters: 4588 + A - the `MATMPIAIJ` matrix 4589 . Ad - the diagonal portion of the matrix 4590 . Ao - the off-diagonal portion of the matrix 4591 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4592 - ierr - error code 4593 4594 Level: advanced 4595 4596 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4597 M*/ 4598 4599 /*@C 4600 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4601 4602 Not Collective 4603 4604 Input Parameter: 4605 . A - The `MATMPIAIJ` matrix 4606 4607 Output Parameters: 4608 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4609 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4610 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4611 4612 Level: intermediate 4613 4614 Note: 4615 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4616 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4617 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4618 local column numbers to global column numbers in the original matrix. 4619 4620 Fortran Notes: 4621 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4622 4623 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4624 @*/ 4625 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4626 { 4627 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4628 PetscBool flg; 4629 4630 PetscFunctionBegin; 4631 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4632 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4633 if (Ad) *Ad = a->A; 4634 if (Ao) *Ao = a->B; 4635 if (colmap) *colmap = a->garray; 4636 PetscFunctionReturn(PETSC_SUCCESS); 4637 } 4638 4639 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4640 { 4641 PetscInt m, N, i, rstart, nnz, Ii; 4642 PetscInt *indx; 4643 PetscScalar *values; 4644 MatType rootType; 4645 4646 PetscFunctionBegin; 4647 PetscCall(MatGetSize(inmat, &m, &N)); 4648 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4649 PetscInt *dnz, *onz, sum, bs, cbs; 4650 4651 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4652 /* Check sum(n) = N */ 4653 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4654 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4655 4656 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4657 rstart -= m; 4658 4659 MatPreallocateBegin(comm, m, n, dnz, onz); 4660 for (i = 0; i < m; i++) { 4661 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4662 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4663 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4664 } 4665 4666 PetscCall(MatCreate(comm, outmat)); 4667 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4668 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4669 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4670 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4671 PetscCall(MatSetType(*outmat, rootType)); 4672 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4673 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4674 MatPreallocateEnd(dnz, onz); 4675 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4676 } 4677 4678 /* numeric phase */ 4679 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4680 for (i = 0; i < m; i++) { 4681 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4682 Ii = i + rstart; 4683 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4684 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4685 } 4686 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4687 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4688 PetscFunctionReturn(PETSC_SUCCESS); 4689 } 4690 4691 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4692 { 4693 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4694 4695 PetscFunctionBegin; 4696 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4697 PetscCall(PetscFree(merge->id_r)); 4698 PetscCall(PetscFree(merge->len_s)); 4699 PetscCall(PetscFree(merge->len_r)); 4700 PetscCall(PetscFree(merge->bi)); 4701 PetscCall(PetscFree(merge->bj)); 4702 PetscCall(PetscFree(merge->buf_ri[0])); 4703 PetscCall(PetscFree(merge->buf_ri)); 4704 PetscCall(PetscFree(merge->buf_rj[0])); 4705 PetscCall(PetscFree(merge->buf_rj)); 4706 PetscCall(PetscFree(merge->coi)); 4707 PetscCall(PetscFree(merge->coj)); 4708 PetscCall(PetscFree(merge->owners_co)); 4709 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4710 PetscCall(PetscFree(merge)); 4711 PetscFunctionReturn(PETSC_SUCCESS); 4712 } 4713 4714 #include <../src/mat/utils/freespace.h> 4715 #include <petscbt.h> 4716 4717 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4718 { 4719 MPI_Comm comm; 4720 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4721 PetscMPIInt size, rank, taga, *len_s; 4722 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4723 PetscMPIInt proc, k; 4724 PetscInt **buf_ri, **buf_rj; 4725 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4726 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4727 MPI_Request *s_waits, *r_waits; 4728 MPI_Status *status; 4729 const MatScalar *aa, *a_a; 4730 MatScalar **abuf_r, *ba_i; 4731 Mat_Merge_SeqsToMPI *merge; 4732 PetscContainer container; 4733 4734 PetscFunctionBegin; 4735 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4736 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4737 4738 PetscCallMPI(MPI_Comm_size(comm, &size)); 4739 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4740 4741 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4742 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4743 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4744 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4745 aa = a_a; 4746 4747 bi = merge->bi; 4748 bj = merge->bj; 4749 buf_ri = merge->buf_ri; 4750 buf_rj = merge->buf_rj; 4751 4752 PetscCall(PetscMalloc1(size, &status)); 4753 owners = merge->rowmap->range; 4754 len_s = merge->len_s; 4755 4756 /* send and recv matrix values */ 4757 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4758 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4759 4760 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4761 for (proc = 0, k = 0; proc < size; proc++) { 4762 if (!len_s[proc]) continue; 4763 i = owners[proc]; 4764 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4765 k++; 4766 } 4767 4768 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4769 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4770 PetscCall(PetscFree(status)); 4771 4772 PetscCall(PetscFree(s_waits)); 4773 PetscCall(PetscFree(r_waits)); 4774 4775 /* insert mat values of mpimat */ 4776 PetscCall(PetscMalloc1(N, &ba_i)); 4777 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4778 4779 for (k = 0; k < merge->nrecv; k++) { 4780 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4781 nrows = *buf_ri_k[k]; 4782 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4783 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4784 } 4785 4786 /* set values of ba */ 4787 m = merge->rowmap->n; 4788 for (i = 0; i < m; i++) { 4789 arow = owners[rank] + i; 4790 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4791 bnzi = bi[i + 1] - bi[i]; 4792 PetscCall(PetscArrayzero(ba_i, bnzi)); 4793 4794 /* add local non-zero vals of this proc's seqmat into ba */ 4795 anzi = ai[arow + 1] - ai[arow]; 4796 aj = a->j + ai[arow]; 4797 aa = a_a + ai[arow]; 4798 nextaj = 0; 4799 for (j = 0; nextaj < anzi; j++) { 4800 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4801 ba_i[j] += aa[nextaj++]; 4802 } 4803 } 4804 4805 /* add received vals into ba */ 4806 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4807 /* i-th row */ 4808 if (i == *nextrow[k]) { 4809 anzi = *(nextai[k] + 1) - *nextai[k]; 4810 aj = buf_rj[k] + *nextai[k]; 4811 aa = abuf_r[k] + *nextai[k]; 4812 nextaj = 0; 4813 for (j = 0; nextaj < anzi; j++) { 4814 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4815 ba_i[j] += aa[nextaj++]; 4816 } 4817 } 4818 nextrow[k]++; 4819 nextai[k]++; 4820 } 4821 } 4822 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4823 } 4824 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4825 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4826 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4827 4828 PetscCall(PetscFree(abuf_r[0])); 4829 PetscCall(PetscFree(abuf_r)); 4830 PetscCall(PetscFree(ba_i)); 4831 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4832 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4833 PetscFunctionReturn(PETSC_SUCCESS); 4834 } 4835 4836 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4837 { 4838 Mat B_mpi; 4839 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4840 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4841 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4842 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4843 PetscInt len, *dnz, *onz, bs, cbs; 4844 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4845 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4846 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4847 MPI_Status *status; 4848 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4849 PetscBT lnkbt; 4850 Mat_Merge_SeqsToMPI *merge; 4851 PetscContainer container; 4852 4853 PetscFunctionBegin; 4854 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4855 4856 /* make sure it is a PETSc comm */ 4857 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4858 PetscCallMPI(MPI_Comm_size(comm, &size)); 4859 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4860 4861 PetscCall(PetscNew(&merge)); 4862 PetscCall(PetscMalloc1(size, &status)); 4863 4864 /* determine row ownership */ 4865 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4866 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4867 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4868 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4869 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4870 PetscCall(PetscMalloc1(size, &len_si)); 4871 PetscCall(PetscMalloc1(size, &merge->len_s)); 4872 4873 m = merge->rowmap->n; 4874 owners = merge->rowmap->range; 4875 4876 /* determine the number of messages to send, their lengths */ 4877 len_s = merge->len_s; 4878 4879 len = 0; /* length of buf_si[] */ 4880 merge->nsend = 0; 4881 for (PetscMPIInt proc = 0; proc < size; proc++) { 4882 len_si[proc] = 0; 4883 if (proc == rank) { 4884 len_s[proc] = 0; 4885 } else { 4886 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4887 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4888 } 4889 if (len_s[proc]) { 4890 merge->nsend++; 4891 nrows = 0; 4892 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4893 if (ai[i + 1] > ai[i]) nrows++; 4894 } 4895 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4896 len += len_si[proc]; 4897 } 4898 } 4899 4900 /* determine the number and length of messages to receive for ij-structure */ 4901 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4902 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4903 4904 /* post the Irecv of j-structure */ 4905 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4906 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4907 4908 /* post the Isend of j-structure */ 4909 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4910 4911 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4912 if (!len_s[proc]) continue; 4913 i = owners[proc]; 4914 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4915 k++; 4916 } 4917 4918 /* receives and sends of j-structure are complete */ 4919 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4920 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4921 4922 /* send and recv i-structure */ 4923 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4924 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4925 4926 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4927 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4928 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4929 if (!len_s[proc]) continue; 4930 /* form outgoing message for i-structure: 4931 buf_si[0]: nrows to be sent 4932 [1:nrows]: row index (global) 4933 [nrows+1:2*nrows+1]: i-structure index 4934 */ 4935 nrows = len_si[proc] / 2 - 1; 4936 buf_si_i = buf_si + nrows + 1; 4937 buf_si[0] = nrows; 4938 buf_si_i[0] = 0; 4939 nrows = 0; 4940 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4941 anzi = ai[i + 1] - ai[i]; 4942 if (anzi) { 4943 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4944 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4945 nrows++; 4946 } 4947 } 4948 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4949 k++; 4950 buf_si += len_si[proc]; 4951 } 4952 4953 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4954 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4955 4956 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4957 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4958 4959 PetscCall(PetscFree(len_si)); 4960 PetscCall(PetscFree(len_ri)); 4961 PetscCall(PetscFree(rj_waits)); 4962 PetscCall(PetscFree2(si_waits, sj_waits)); 4963 PetscCall(PetscFree(ri_waits)); 4964 PetscCall(PetscFree(buf_s)); 4965 PetscCall(PetscFree(status)); 4966 4967 /* compute a local seq matrix in each processor */ 4968 /* allocate bi array and free space for accumulating nonzero column info */ 4969 PetscCall(PetscMalloc1(m + 1, &bi)); 4970 bi[0] = 0; 4971 4972 /* create and initialize a linked list */ 4973 nlnk = N + 1; 4974 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4975 4976 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4977 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4978 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4979 4980 current_space = free_space; 4981 4982 /* determine symbolic info for each local row */ 4983 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4984 4985 for (k = 0; k < merge->nrecv; k++) { 4986 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4987 nrows = *buf_ri_k[k]; 4988 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4989 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4990 } 4991 4992 MatPreallocateBegin(comm, m, n, dnz, onz); 4993 len = 0; 4994 for (i = 0; i < m; i++) { 4995 bnzi = 0; 4996 /* add local non-zero cols of this proc's seqmat into lnk */ 4997 arow = owners[rank] + i; 4998 anzi = ai[arow + 1] - ai[arow]; 4999 aj = a->j + ai[arow]; 5000 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5001 bnzi += nlnk; 5002 /* add received col data into lnk */ 5003 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5004 if (i == *nextrow[k]) { /* i-th row */ 5005 anzi = *(nextai[k] + 1) - *nextai[k]; 5006 aj = buf_rj[k] + *nextai[k]; 5007 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5008 bnzi += nlnk; 5009 nextrow[k]++; 5010 nextai[k]++; 5011 } 5012 } 5013 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5014 5015 /* if free space is not available, make more free space */ 5016 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5017 /* copy data into free space, then initialize lnk */ 5018 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5019 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5020 5021 current_space->array += bnzi; 5022 current_space->local_used += bnzi; 5023 current_space->local_remaining -= bnzi; 5024 5025 bi[i + 1] = bi[i] + bnzi; 5026 } 5027 5028 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5029 5030 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5031 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5032 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5033 5034 /* create symbolic parallel matrix B_mpi */ 5035 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5036 PetscCall(MatCreate(comm, &B_mpi)); 5037 if (n == PETSC_DECIDE) { 5038 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5039 } else { 5040 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5041 } 5042 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5043 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5044 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5045 MatPreallocateEnd(dnz, onz); 5046 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5047 5048 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5049 B_mpi->assembled = PETSC_FALSE; 5050 merge->bi = bi; 5051 merge->bj = bj; 5052 merge->buf_ri = buf_ri; 5053 merge->buf_rj = buf_rj; 5054 merge->coi = NULL; 5055 merge->coj = NULL; 5056 merge->owners_co = NULL; 5057 5058 PetscCall(PetscCommDestroy(&comm)); 5059 5060 /* attach the supporting struct to B_mpi for reuse */ 5061 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5062 PetscCall(PetscContainerSetPointer(container, merge)); 5063 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5064 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5065 PetscCall(PetscContainerDestroy(&container)); 5066 *mpimat = B_mpi; 5067 5068 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5069 PetscFunctionReturn(PETSC_SUCCESS); 5070 } 5071 5072 /*@ 5073 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5074 matrices from each processor 5075 5076 Collective 5077 5078 Input Parameters: 5079 + comm - the communicators the parallel matrix will live on 5080 . seqmat - the input sequential matrices 5081 . m - number of local rows (or `PETSC_DECIDE`) 5082 . n - number of local columns (or `PETSC_DECIDE`) 5083 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5084 5085 Output Parameter: 5086 . mpimat - the parallel matrix generated 5087 5088 Level: advanced 5089 5090 Note: 5091 The dimensions of the sequential matrix in each processor MUST be the same. 5092 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5093 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5094 5095 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5096 @*/ 5097 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5098 { 5099 PetscMPIInt size; 5100 5101 PetscFunctionBegin; 5102 PetscCallMPI(MPI_Comm_size(comm, &size)); 5103 if (size == 1) { 5104 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5105 if (scall == MAT_INITIAL_MATRIX) { 5106 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5107 } else { 5108 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5109 } 5110 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5111 PetscFunctionReturn(PETSC_SUCCESS); 5112 } 5113 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5114 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5115 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5116 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5117 PetscFunctionReturn(PETSC_SUCCESS); 5118 } 5119 5120 /*@ 5121 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5122 5123 Not Collective 5124 5125 Input Parameter: 5126 . A - the matrix 5127 5128 Output Parameter: 5129 . A_loc - the local sequential matrix generated 5130 5131 Level: developer 5132 5133 Notes: 5134 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5135 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5136 `n` is the global column count obtained with `MatGetSize()` 5137 5138 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5139 5140 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5141 5142 Destroy the matrix with `MatDestroy()` 5143 5144 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5145 @*/ 5146 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5147 { 5148 PetscBool mpi; 5149 5150 PetscFunctionBegin; 5151 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5152 if (mpi) { 5153 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5154 } else { 5155 *A_loc = A; 5156 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5157 } 5158 PetscFunctionReturn(PETSC_SUCCESS); 5159 } 5160 5161 /*@ 5162 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5163 5164 Not Collective 5165 5166 Input Parameters: 5167 + A - the matrix 5168 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5169 5170 Output Parameter: 5171 . A_loc - the local sequential matrix generated 5172 5173 Level: developer 5174 5175 Notes: 5176 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5177 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5178 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5179 5180 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5181 5182 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5183 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5184 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5185 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5186 5187 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5188 @*/ 5189 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5190 { 5191 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5192 Mat_SeqAIJ *mat, *a, *b; 5193 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5194 const PetscScalar *aa, *ba, *aav, *bav; 5195 PetscScalar *ca, *cam; 5196 PetscMPIInt size; 5197 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5198 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5199 PetscBool match; 5200 5201 PetscFunctionBegin; 5202 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5203 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5204 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5205 if (size == 1) { 5206 if (scall == MAT_INITIAL_MATRIX) { 5207 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5208 *A_loc = mpimat->A; 5209 } else if (scall == MAT_REUSE_MATRIX) { 5210 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5211 } 5212 PetscFunctionReturn(PETSC_SUCCESS); 5213 } 5214 5215 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5216 a = (Mat_SeqAIJ *)mpimat->A->data; 5217 b = (Mat_SeqAIJ *)mpimat->B->data; 5218 ai = a->i; 5219 aj = a->j; 5220 bi = b->i; 5221 bj = b->j; 5222 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5223 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5224 aa = aav; 5225 ba = bav; 5226 if (scall == MAT_INITIAL_MATRIX) { 5227 PetscCall(PetscMalloc1(1 + am, &ci)); 5228 ci[0] = 0; 5229 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5230 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5231 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5232 k = 0; 5233 for (i = 0; i < am; i++) { 5234 ncols_o = bi[i + 1] - bi[i]; 5235 ncols_d = ai[i + 1] - ai[i]; 5236 /* off-diagonal portion of A */ 5237 for (jo = 0; jo < ncols_o; jo++) { 5238 col = cmap[*bj]; 5239 if (col >= cstart) break; 5240 cj[k] = col; 5241 bj++; 5242 ca[k++] = *ba++; 5243 } 5244 /* diagonal portion of A */ 5245 for (j = 0; j < ncols_d; j++) { 5246 cj[k] = cstart + *aj++; 5247 ca[k++] = *aa++; 5248 } 5249 /* off-diagonal portion of A */ 5250 for (j = jo; j < ncols_o; j++) { 5251 cj[k] = cmap[*bj++]; 5252 ca[k++] = *ba++; 5253 } 5254 } 5255 /* put together the new matrix */ 5256 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5257 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5258 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5259 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5260 mat->free_a = PETSC_TRUE; 5261 mat->free_ij = PETSC_TRUE; 5262 mat->nonew = 0; 5263 } else if (scall == MAT_REUSE_MATRIX) { 5264 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5265 ci = mat->i; 5266 cj = mat->j; 5267 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5268 for (i = 0; i < am; i++) { 5269 /* off-diagonal portion of A */ 5270 ncols_o = bi[i + 1] - bi[i]; 5271 for (jo = 0; jo < ncols_o; jo++) { 5272 col = cmap[*bj]; 5273 if (col >= cstart) break; 5274 *cam++ = *ba++; 5275 bj++; 5276 } 5277 /* diagonal portion of A */ 5278 ncols_d = ai[i + 1] - ai[i]; 5279 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5280 /* off-diagonal portion of A */ 5281 for (j = jo; j < ncols_o; j++) { 5282 *cam++ = *ba++; 5283 bj++; 5284 } 5285 } 5286 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5287 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5288 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5289 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5290 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5291 PetscFunctionReturn(PETSC_SUCCESS); 5292 } 5293 5294 /*@ 5295 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5296 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5297 5298 Not Collective 5299 5300 Input Parameters: 5301 + A - the matrix 5302 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5303 5304 Output Parameters: 5305 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5306 - A_loc - the local sequential matrix generated 5307 5308 Level: developer 5309 5310 Note: 5311 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5312 part, then those associated with the off-diagonal part (in its local ordering) 5313 5314 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5315 @*/ 5316 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5317 { 5318 Mat Ao, Ad; 5319 const PetscInt *cmap; 5320 PetscMPIInt size; 5321 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5322 5323 PetscFunctionBegin; 5324 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5325 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5326 if (size == 1) { 5327 if (scall == MAT_INITIAL_MATRIX) { 5328 PetscCall(PetscObjectReference((PetscObject)Ad)); 5329 *A_loc = Ad; 5330 } else if (scall == MAT_REUSE_MATRIX) { 5331 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5332 } 5333 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5334 PetscFunctionReturn(PETSC_SUCCESS); 5335 } 5336 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5337 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5338 if (f) { 5339 PetscCall((*f)(A, scall, glob, A_loc)); 5340 } else { 5341 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5342 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5343 Mat_SeqAIJ *c; 5344 PetscInt *ai = a->i, *aj = a->j; 5345 PetscInt *bi = b->i, *bj = b->j; 5346 PetscInt *ci, *cj; 5347 const PetscScalar *aa, *ba; 5348 PetscScalar *ca; 5349 PetscInt i, j, am, dn, on; 5350 5351 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5352 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5353 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5354 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5355 if (scall == MAT_INITIAL_MATRIX) { 5356 PetscInt k; 5357 PetscCall(PetscMalloc1(1 + am, &ci)); 5358 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5359 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5360 ci[0] = 0; 5361 for (i = 0, k = 0; i < am; i++) { 5362 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5363 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5364 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5365 /* diagonal portion of A */ 5366 for (j = 0; j < ncols_d; j++, k++) { 5367 cj[k] = *aj++; 5368 ca[k] = *aa++; 5369 } 5370 /* off-diagonal portion of A */ 5371 for (j = 0; j < ncols_o; j++, k++) { 5372 cj[k] = dn + *bj++; 5373 ca[k] = *ba++; 5374 } 5375 } 5376 /* put together the new matrix */ 5377 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5378 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5379 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5380 c = (Mat_SeqAIJ *)(*A_loc)->data; 5381 c->free_a = PETSC_TRUE; 5382 c->free_ij = PETSC_TRUE; 5383 c->nonew = 0; 5384 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5385 } else if (scall == MAT_REUSE_MATRIX) { 5386 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5387 for (i = 0; i < am; i++) { 5388 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5389 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5390 /* diagonal portion of A */ 5391 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5392 /* off-diagonal portion of A */ 5393 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5394 } 5395 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5396 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5397 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5398 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5399 if (glob) { 5400 PetscInt cst, *gidx; 5401 5402 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5403 PetscCall(PetscMalloc1(dn + on, &gidx)); 5404 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5405 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5406 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5407 } 5408 } 5409 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5410 PetscFunctionReturn(PETSC_SUCCESS); 5411 } 5412 5413 /*@C 5414 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5415 5416 Not Collective 5417 5418 Input Parameters: 5419 + A - the matrix 5420 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5421 . row - index set of rows to extract (or `NULL`) 5422 - col - index set of columns to extract (or `NULL`) 5423 5424 Output Parameter: 5425 . A_loc - the local sequential matrix generated 5426 5427 Level: developer 5428 5429 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5430 @*/ 5431 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5432 { 5433 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5434 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5435 IS isrowa, iscola; 5436 Mat *aloc; 5437 PetscBool match; 5438 5439 PetscFunctionBegin; 5440 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5441 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5442 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5443 if (!row) { 5444 start = A->rmap->rstart; 5445 end = A->rmap->rend; 5446 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5447 } else { 5448 isrowa = *row; 5449 } 5450 if (!col) { 5451 start = A->cmap->rstart; 5452 cmap = a->garray; 5453 nzA = a->A->cmap->n; 5454 nzB = a->B->cmap->n; 5455 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5456 ncols = 0; 5457 for (i = 0; i < nzB; i++) { 5458 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5459 else break; 5460 } 5461 imark = i; 5462 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5463 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5464 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5465 } else { 5466 iscola = *col; 5467 } 5468 if (scall != MAT_INITIAL_MATRIX) { 5469 PetscCall(PetscMalloc1(1, &aloc)); 5470 aloc[0] = *A_loc; 5471 } 5472 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5473 if (!col) { /* attach global id of condensed columns */ 5474 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5475 } 5476 *A_loc = aloc[0]; 5477 PetscCall(PetscFree(aloc)); 5478 if (!row) PetscCall(ISDestroy(&isrowa)); 5479 if (!col) PetscCall(ISDestroy(&iscola)); 5480 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5481 PetscFunctionReturn(PETSC_SUCCESS); 5482 } 5483 5484 /* 5485 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5486 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5487 * on a global size. 5488 * */ 5489 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5490 { 5491 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5492 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5493 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5494 PetscMPIInt owner; 5495 PetscSFNode *iremote, *oiremote; 5496 const PetscInt *lrowindices; 5497 PetscSF sf, osf; 5498 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5499 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5500 MPI_Comm comm; 5501 ISLocalToGlobalMapping mapping; 5502 const PetscScalar *pd_a, *po_a; 5503 5504 PetscFunctionBegin; 5505 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5506 /* plocalsize is the number of roots 5507 * nrows is the number of leaves 5508 * */ 5509 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5510 PetscCall(ISGetLocalSize(rows, &nrows)); 5511 PetscCall(PetscCalloc1(nrows, &iremote)); 5512 PetscCall(ISGetIndices(rows, &lrowindices)); 5513 for (i = 0; i < nrows; i++) { 5514 /* Find a remote index and an owner for a row 5515 * The row could be local or remote 5516 * */ 5517 owner = 0; 5518 lidx = 0; 5519 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5520 iremote[i].index = lidx; 5521 iremote[i].rank = owner; 5522 } 5523 /* Create SF to communicate how many nonzero columns for each row */ 5524 PetscCall(PetscSFCreate(comm, &sf)); 5525 /* SF will figure out the number of nonzero columns for each row, and their 5526 * offsets 5527 * */ 5528 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5529 PetscCall(PetscSFSetFromOptions(sf)); 5530 PetscCall(PetscSFSetUp(sf)); 5531 5532 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5533 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5534 PetscCall(PetscCalloc1(nrows, &pnnz)); 5535 roffsets[0] = 0; 5536 roffsets[1] = 0; 5537 for (i = 0; i < plocalsize; i++) { 5538 /* diagonal */ 5539 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5540 /* off-diagonal */ 5541 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5542 /* compute offsets so that we relative location for each row */ 5543 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5544 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5545 } 5546 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5547 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5548 /* 'r' means root, and 'l' means leaf */ 5549 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5550 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5551 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5552 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5553 PetscCall(PetscSFDestroy(&sf)); 5554 PetscCall(PetscFree(roffsets)); 5555 PetscCall(PetscFree(nrcols)); 5556 dntotalcols = 0; 5557 ontotalcols = 0; 5558 ncol = 0; 5559 for (i = 0; i < nrows; i++) { 5560 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5561 ncol = PetscMax(pnnz[i], ncol); 5562 /* diagonal */ 5563 dntotalcols += nlcols[i * 2 + 0]; 5564 /* off-diagonal */ 5565 ontotalcols += nlcols[i * 2 + 1]; 5566 } 5567 /* We do not need to figure the right number of columns 5568 * since all the calculations will be done by going through the raw data 5569 * */ 5570 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5571 PetscCall(MatSetUp(*P_oth)); 5572 PetscCall(PetscFree(pnnz)); 5573 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5574 /* diagonal */ 5575 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5576 /* off-diagonal */ 5577 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5578 /* diagonal */ 5579 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5580 /* off-diagonal */ 5581 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5582 dntotalcols = 0; 5583 ontotalcols = 0; 5584 ntotalcols = 0; 5585 for (i = 0; i < nrows; i++) { 5586 owner = 0; 5587 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5588 /* Set iremote for diag matrix */ 5589 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5590 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5591 iremote[dntotalcols].rank = owner; 5592 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5593 ilocal[dntotalcols++] = ntotalcols++; 5594 } 5595 /* off-diagonal */ 5596 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5597 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5598 oiremote[ontotalcols].rank = owner; 5599 oilocal[ontotalcols++] = ntotalcols++; 5600 } 5601 } 5602 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5603 PetscCall(PetscFree(loffsets)); 5604 PetscCall(PetscFree(nlcols)); 5605 PetscCall(PetscSFCreate(comm, &sf)); 5606 /* P serves as roots and P_oth is leaves 5607 * Diag matrix 5608 * */ 5609 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5610 PetscCall(PetscSFSetFromOptions(sf)); 5611 PetscCall(PetscSFSetUp(sf)); 5612 5613 PetscCall(PetscSFCreate(comm, &osf)); 5614 /* off-diagonal */ 5615 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5616 PetscCall(PetscSFSetFromOptions(osf)); 5617 PetscCall(PetscSFSetUp(osf)); 5618 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5619 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5620 /* operate on the matrix internal data to save memory */ 5621 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5622 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5623 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5624 /* Convert to global indices for diag matrix */ 5625 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5626 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5627 /* We want P_oth store global indices */ 5628 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5629 /* Use memory scalable approach */ 5630 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5631 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5632 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5633 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5634 /* Convert back to local indices */ 5635 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5636 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5637 nout = 0; 5638 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5639 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5640 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5641 /* Exchange values */ 5642 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5643 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5644 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5645 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5646 /* Stop PETSc from shrinking memory */ 5647 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5648 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5649 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5650 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5651 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5652 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5653 PetscCall(PetscSFDestroy(&sf)); 5654 PetscCall(PetscSFDestroy(&osf)); 5655 PetscFunctionReturn(PETSC_SUCCESS); 5656 } 5657 5658 /* 5659 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5660 * This supports MPIAIJ and MAIJ 5661 * */ 5662 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5663 { 5664 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5665 Mat_SeqAIJ *p_oth; 5666 IS rows, map; 5667 PetscHMapI hamp; 5668 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5669 MPI_Comm comm; 5670 PetscSF sf, osf; 5671 PetscBool has; 5672 5673 PetscFunctionBegin; 5674 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5675 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5676 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5677 * and then create a submatrix (that often is an overlapping matrix) 5678 * */ 5679 if (reuse == MAT_INITIAL_MATRIX) { 5680 /* Use a hash table to figure out unique keys */ 5681 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5682 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5683 count = 0; 5684 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5685 for (i = 0; i < a->B->cmap->n; i++) { 5686 key = a->garray[i] / dof; 5687 PetscCall(PetscHMapIHas(hamp, key, &has)); 5688 if (!has) { 5689 mapping[i] = count; 5690 PetscCall(PetscHMapISet(hamp, key, count++)); 5691 } else { 5692 /* Current 'i' has the same value the previous step */ 5693 mapping[i] = count - 1; 5694 } 5695 } 5696 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5697 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5698 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5699 PetscCall(PetscCalloc1(htsize, &rowindices)); 5700 off = 0; 5701 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5702 PetscCall(PetscHMapIDestroy(&hamp)); 5703 PetscCall(PetscSortInt(htsize, rowindices)); 5704 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5705 /* In case, the matrix was already created but users want to recreate the matrix */ 5706 PetscCall(MatDestroy(P_oth)); 5707 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5708 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5709 PetscCall(ISDestroy(&map)); 5710 PetscCall(ISDestroy(&rows)); 5711 } else if (reuse == MAT_REUSE_MATRIX) { 5712 /* If matrix was already created, we simply update values using SF objects 5713 * that as attached to the matrix earlier. 5714 */ 5715 const PetscScalar *pd_a, *po_a; 5716 5717 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5718 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5719 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5720 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5721 /* Update values in place */ 5722 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5723 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5724 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5725 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5726 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5727 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5728 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5729 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5730 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5731 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5732 PetscFunctionReturn(PETSC_SUCCESS); 5733 } 5734 5735 /*@C 5736 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5737 5738 Collective 5739 5740 Input Parameters: 5741 + A - the first matrix in `MATMPIAIJ` format 5742 . B - the second matrix in `MATMPIAIJ` format 5743 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5744 5745 Output Parameters: 5746 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5747 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5748 - B_seq - the sequential matrix generated 5749 5750 Level: developer 5751 5752 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5753 @*/ 5754 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5755 { 5756 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5757 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5758 IS isrowb, iscolb; 5759 Mat *bseq = NULL; 5760 5761 PetscFunctionBegin; 5762 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5763 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5764 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5765 5766 if (scall == MAT_INITIAL_MATRIX) { 5767 start = A->cmap->rstart; 5768 cmap = a->garray; 5769 nzA = a->A->cmap->n; 5770 nzB = a->B->cmap->n; 5771 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5772 ncols = 0; 5773 for (i = 0; i < nzB; i++) { /* row < local row index */ 5774 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5775 else break; 5776 } 5777 imark = i; 5778 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5779 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5780 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5781 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5782 } else { 5783 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5784 isrowb = *rowb; 5785 iscolb = *colb; 5786 PetscCall(PetscMalloc1(1, &bseq)); 5787 bseq[0] = *B_seq; 5788 } 5789 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5790 *B_seq = bseq[0]; 5791 PetscCall(PetscFree(bseq)); 5792 if (!rowb) { 5793 PetscCall(ISDestroy(&isrowb)); 5794 } else { 5795 *rowb = isrowb; 5796 } 5797 if (!colb) { 5798 PetscCall(ISDestroy(&iscolb)); 5799 } else { 5800 *colb = iscolb; 5801 } 5802 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5803 PetscFunctionReturn(PETSC_SUCCESS); 5804 } 5805 5806 /* 5807 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5808 of the OFF-DIAGONAL portion of local A 5809 5810 Collective 5811 5812 Input Parameters: 5813 + A,B - the matrices in `MATMPIAIJ` format 5814 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5815 5816 Output Parameter: 5817 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5818 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5819 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5820 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5821 5822 Developer Note: 5823 This directly accesses information inside the VecScatter associated with the matrix-vector product 5824 for this matrix. This is not desirable.. 5825 5826 Level: developer 5827 5828 */ 5829 5830 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5831 { 5832 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5833 VecScatter ctx; 5834 MPI_Comm comm; 5835 const PetscMPIInt *rprocs, *sprocs; 5836 PetscMPIInt nrecvs, nsends; 5837 const PetscInt *srow, *rstarts, *sstarts; 5838 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5839 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5840 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5841 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5842 PetscMPIInt size, tag, rank, nreqs; 5843 5844 PetscFunctionBegin; 5845 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5846 PetscCallMPI(MPI_Comm_size(comm, &size)); 5847 5848 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5849 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5850 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5851 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5852 5853 if (size == 1) { 5854 startsj_s = NULL; 5855 bufa_ptr = NULL; 5856 *B_oth = NULL; 5857 PetscFunctionReturn(PETSC_SUCCESS); 5858 } 5859 5860 ctx = a->Mvctx; 5861 tag = ((PetscObject)ctx)->tag; 5862 5863 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5864 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5865 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5866 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5867 PetscCall(PetscMalloc1(nreqs, &reqs)); 5868 rwaits = reqs; 5869 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5870 5871 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5872 if (scall == MAT_INITIAL_MATRIX) { 5873 /* i-array */ 5874 /* post receives */ 5875 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5876 for (i = 0; i < nrecvs; i++) { 5877 rowlen = rvalues + rstarts[i] * rbs; 5878 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5879 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5880 } 5881 5882 /* pack the outgoing message */ 5883 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5884 5885 sstartsj[0] = 0; 5886 rstartsj[0] = 0; 5887 len = 0; /* total length of j or a array to be sent */ 5888 if (nsends) { 5889 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5890 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5891 } 5892 for (i = 0; i < nsends; i++) { 5893 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5894 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5895 for (j = 0; j < nrows; j++) { 5896 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5897 for (l = 0; l < sbs; l++) { 5898 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5899 5900 rowlen[j * sbs + l] = ncols; 5901 5902 len += ncols; 5903 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5904 } 5905 k++; 5906 } 5907 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5908 5909 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5910 } 5911 /* recvs and sends of i-array are completed */ 5912 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5913 PetscCall(PetscFree(svalues)); 5914 5915 /* allocate buffers for sending j and a arrays */ 5916 PetscCall(PetscMalloc1(len + 1, &bufj)); 5917 PetscCall(PetscMalloc1(len + 1, &bufa)); 5918 5919 /* create i-array of B_oth */ 5920 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5921 5922 b_othi[0] = 0; 5923 len = 0; /* total length of j or a array to be received */ 5924 k = 0; 5925 for (i = 0; i < nrecvs; i++) { 5926 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5927 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5928 for (j = 0; j < nrows; j++) { 5929 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5930 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5931 k++; 5932 } 5933 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5934 } 5935 PetscCall(PetscFree(rvalues)); 5936 5937 /* allocate space for j and a arrays of B_oth */ 5938 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5939 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5940 5941 /* j-array */ 5942 /* post receives of j-array */ 5943 for (i = 0; i < nrecvs; i++) { 5944 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5945 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5946 } 5947 5948 /* pack the outgoing message j-array */ 5949 if (nsends) k = sstarts[0]; 5950 for (i = 0; i < nsends; i++) { 5951 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5952 bufJ = bufj + sstartsj[i]; 5953 for (j = 0; j < nrows; j++) { 5954 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5955 for (ll = 0; ll < sbs; ll++) { 5956 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5957 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5958 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5959 } 5960 } 5961 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5962 } 5963 5964 /* recvs and sends of j-array are completed */ 5965 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5966 } else if (scall == MAT_REUSE_MATRIX) { 5967 sstartsj = *startsj_s; 5968 rstartsj = *startsj_r; 5969 bufa = *bufa_ptr; 5970 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5971 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5972 5973 /* a-array */ 5974 /* post receives of a-array */ 5975 for (i = 0; i < nrecvs; i++) { 5976 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5977 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5978 } 5979 5980 /* pack the outgoing message a-array */ 5981 if (nsends) k = sstarts[0]; 5982 for (i = 0; i < nsends; i++) { 5983 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5984 bufA = bufa + sstartsj[i]; 5985 for (j = 0; j < nrows; j++) { 5986 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5987 for (ll = 0; ll < sbs; ll++) { 5988 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5989 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5990 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5991 } 5992 } 5993 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5994 } 5995 /* recvs and sends of a-array are completed */ 5996 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5997 PetscCall(PetscFree(reqs)); 5998 5999 if (scall == MAT_INITIAL_MATRIX) { 6000 Mat_SeqAIJ *b_oth; 6001 6002 /* put together the new matrix */ 6003 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6004 6005 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6006 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6007 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6008 b_oth->free_a = PETSC_TRUE; 6009 b_oth->free_ij = PETSC_TRUE; 6010 b_oth->nonew = 0; 6011 6012 PetscCall(PetscFree(bufj)); 6013 if (!startsj_s || !bufa_ptr) { 6014 PetscCall(PetscFree2(sstartsj, rstartsj)); 6015 PetscCall(PetscFree(bufa_ptr)); 6016 } else { 6017 *startsj_s = sstartsj; 6018 *startsj_r = rstartsj; 6019 *bufa_ptr = bufa; 6020 } 6021 } else if (scall == MAT_REUSE_MATRIX) { 6022 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6023 } 6024 6025 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6026 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6027 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6028 PetscFunctionReturn(PETSC_SUCCESS); 6029 } 6030 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6034 #if defined(PETSC_HAVE_MKL_SPARSE) 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6036 #endif 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6039 #if defined(PETSC_HAVE_ELEMENTAL) 6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6041 #endif 6042 #if defined(PETSC_HAVE_SCALAPACK) 6043 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6044 #endif 6045 #if defined(PETSC_HAVE_HYPRE) 6046 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6047 #endif 6048 #if defined(PETSC_HAVE_CUDA) 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 #if defined(PETSC_HAVE_HIP) 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6056 #endif 6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6058 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6059 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6060 6061 /* 6062 Computes (B'*A')' since computing B*A directly is untenable 6063 6064 n p p 6065 [ ] [ ] [ ] 6066 m [ A ] * n [ B ] = m [ C ] 6067 [ ] [ ] [ ] 6068 6069 */ 6070 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6071 { 6072 Mat At, Bt, Ct; 6073 6074 PetscFunctionBegin; 6075 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6076 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6077 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6078 PetscCall(MatDestroy(&At)); 6079 PetscCall(MatDestroy(&Bt)); 6080 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6081 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6082 PetscCall(MatDestroy(&Ct)); 6083 PetscFunctionReturn(PETSC_SUCCESS); 6084 } 6085 6086 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6087 { 6088 PetscBool cisdense; 6089 6090 PetscFunctionBegin; 6091 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6092 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6093 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6094 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6095 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6096 PetscCall(MatSetUp(C)); 6097 6098 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6099 PetscFunctionReturn(PETSC_SUCCESS); 6100 } 6101 6102 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6103 { 6104 Mat_Product *product = C->product; 6105 Mat A = product->A, B = product->B; 6106 6107 PetscFunctionBegin; 6108 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6109 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6110 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6111 C->ops->productsymbolic = MatProductSymbolic_AB; 6112 PetscFunctionReturn(PETSC_SUCCESS); 6113 } 6114 6115 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6116 { 6117 Mat_Product *product = C->product; 6118 6119 PetscFunctionBegin; 6120 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6121 PetscFunctionReturn(PETSC_SUCCESS); 6122 } 6123 6124 /* 6125 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6126 6127 Input Parameters: 6128 6129 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6130 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6131 6132 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6133 6134 For Set1, j1[] contains column indices of the nonzeros. 6135 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6136 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6137 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6138 6139 Similar for Set2. 6140 6141 This routine merges the two sets of nonzeros row by row and removes repeats. 6142 6143 Output Parameters: (memory is allocated by the caller) 6144 6145 i[],j[]: the CSR of the merged matrix, which has m rows. 6146 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6147 imap2[]: similar to imap1[], but for Set2. 6148 Note we order nonzeros row-by-row and from left to right. 6149 */ 6150 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6151 { 6152 PetscInt r, m; /* Row index of mat */ 6153 PetscCount t, t1, t2, b1, e1, b2, e2; 6154 6155 PetscFunctionBegin; 6156 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6157 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6158 i[0] = 0; 6159 for (r = 0; r < m; r++) { /* Do row by row merging */ 6160 b1 = rowBegin1[r]; 6161 e1 = rowEnd1[r]; 6162 b2 = rowBegin2[r]; 6163 e2 = rowEnd2[r]; 6164 while (b1 < e1 && b2 < e2) { 6165 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6166 j[t] = j1[b1]; 6167 imap1[t1] = t; 6168 imap2[t2] = t; 6169 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6170 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6171 t1++; 6172 t2++; 6173 t++; 6174 } else if (j1[b1] < j2[b2]) { 6175 j[t] = j1[b1]; 6176 imap1[t1] = t; 6177 b1 += jmap1[t1 + 1] - jmap1[t1]; 6178 t1++; 6179 t++; 6180 } else { 6181 j[t] = j2[b2]; 6182 imap2[t2] = t; 6183 b2 += jmap2[t2 + 1] - jmap2[t2]; 6184 t2++; 6185 t++; 6186 } 6187 } 6188 /* Merge the remaining in either j1[] or j2[] */ 6189 while (b1 < e1) { 6190 j[t] = j1[b1]; 6191 imap1[t1] = t; 6192 b1 += jmap1[t1 + 1] - jmap1[t1]; 6193 t1++; 6194 t++; 6195 } 6196 while (b2 < e2) { 6197 j[t] = j2[b2]; 6198 imap2[t2] = t; 6199 b2 += jmap2[t2 + 1] - jmap2[t2]; 6200 t2++; 6201 t++; 6202 } 6203 PetscCall(PetscIntCast(t, i + r + 1)); 6204 } 6205 PetscFunctionReturn(PETSC_SUCCESS); 6206 } 6207 6208 /* 6209 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6210 6211 Input Parameters: 6212 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6213 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6214 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6215 6216 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6217 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6218 6219 Output Parameters: 6220 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6221 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6222 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6223 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6224 6225 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6226 Atot: number of entries belonging to the diagonal block. 6227 Annz: number of unique nonzeros belonging to the diagonal block. 6228 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6229 repeats (i.e., same 'i,j' pair). 6230 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6231 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6232 6233 Atot: number of entries belonging to the diagonal block 6234 Annz: number of unique nonzeros belonging to the diagonal block. 6235 6236 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6237 6238 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6239 */ 6240 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6241 { 6242 PetscInt cstart, cend, rstart, rend, row, col; 6243 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6244 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6245 PetscCount k, m, p, q, r, s, mid; 6246 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6247 6248 PetscFunctionBegin; 6249 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6250 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6251 m = rend - rstart; 6252 6253 /* Skip negative rows */ 6254 for (k = 0; k < n; k++) 6255 if (i[k] >= 0) break; 6256 6257 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6258 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6259 */ 6260 while (k < n) { 6261 row = i[k]; 6262 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6263 for (s = k; s < n; s++) 6264 if (i[s] != row) break; 6265 6266 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6267 for (p = k; p < s; p++) { 6268 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6269 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6270 } 6271 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6272 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6273 rowBegin[row - rstart] = k; 6274 rowMid[row - rstart] = mid; 6275 rowEnd[row - rstart] = s; 6276 6277 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6278 Atot += mid - k; 6279 Btot += s - mid; 6280 6281 /* Count unique nonzeros of this diag row */ 6282 for (p = k; p < mid;) { 6283 col = j[p]; 6284 do { 6285 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6286 p++; 6287 } while (p < mid && j[p] == col); 6288 Annz++; 6289 } 6290 6291 /* Count unique nonzeros of this offdiag row */ 6292 for (p = mid; p < s;) { 6293 col = j[p]; 6294 do { 6295 p++; 6296 } while (p < s && j[p] == col); 6297 Bnnz++; 6298 } 6299 k = s; 6300 } 6301 6302 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6303 PetscCall(PetscMalloc1(Atot, &Aperm)); 6304 PetscCall(PetscMalloc1(Btot, &Bperm)); 6305 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6306 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6307 6308 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6309 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6310 for (r = 0; r < m; r++) { 6311 k = rowBegin[r]; 6312 mid = rowMid[r]; 6313 s = rowEnd[r]; 6314 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6315 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6316 Atot += mid - k; 6317 Btot += s - mid; 6318 6319 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6320 for (p = k; p < mid;) { 6321 col = j[p]; 6322 q = p; 6323 do { 6324 p++; 6325 } while (p < mid && j[p] == col); 6326 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6327 Annz++; 6328 } 6329 6330 for (p = mid; p < s;) { 6331 col = j[p]; 6332 q = p; 6333 do { 6334 p++; 6335 } while (p < s && j[p] == col); 6336 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6337 Bnnz++; 6338 } 6339 } 6340 /* Output */ 6341 *Aperm_ = Aperm; 6342 *Annz_ = Annz; 6343 *Atot_ = Atot; 6344 *Ajmap_ = Ajmap; 6345 *Bperm_ = Bperm; 6346 *Bnnz_ = Bnnz; 6347 *Btot_ = Btot; 6348 *Bjmap_ = Bjmap; 6349 PetscFunctionReturn(PETSC_SUCCESS); 6350 } 6351 6352 /* 6353 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6354 6355 Input Parameters: 6356 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6357 nnz: number of unique nonzeros in the merged matrix 6358 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6359 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6360 6361 Output Parameter: (memory is allocated by the caller) 6362 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6363 6364 Example: 6365 nnz1 = 4 6366 nnz = 6 6367 imap = [1,3,4,5] 6368 jmap = [0,3,5,6,7] 6369 then, 6370 jmap_new = [0,0,3,3,5,6,7] 6371 */ 6372 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6373 { 6374 PetscCount k, p; 6375 6376 PetscFunctionBegin; 6377 jmap_new[0] = 0; 6378 p = nnz; /* p loops over jmap_new[] backwards */ 6379 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6380 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6381 } 6382 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6383 PetscFunctionReturn(PETSC_SUCCESS); 6384 } 6385 6386 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6387 { 6388 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6389 6390 PetscFunctionBegin; 6391 PetscCall(PetscSFDestroy(&coo->sf)); 6392 PetscCall(PetscFree(coo->Aperm1)); 6393 PetscCall(PetscFree(coo->Bperm1)); 6394 PetscCall(PetscFree(coo->Ajmap1)); 6395 PetscCall(PetscFree(coo->Bjmap1)); 6396 PetscCall(PetscFree(coo->Aimap2)); 6397 PetscCall(PetscFree(coo->Bimap2)); 6398 PetscCall(PetscFree(coo->Aperm2)); 6399 PetscCall(PetscFree(coo->Bperm2)); 6400 PetscCall(PetscFree(coo->Ajmap2)); 6401 PetscCall(PetscFree(coo->Bjmap2)); 6402 PetscCall(PetscFree(coo->Cperm1)); 6403 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6404 PetscCall(PetscFree(coo)); 6405 PetscFunctionReturn(PETSC_SUCCESS); 6406 } 6407 6408 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6409 { 6410 MPI_Comm comm; 6411 PetscMPIInt rank, size; 6412 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6413 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6414 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6415 PetscContainer container; 6416 MatCOOStruct_MPIAIJ *coo; 6417 6418 PetscFunctionBegin; 6419 PetscCall(PetscFree(mpiaij->garray)); 6420 PetscCall(VecDestroy(&mpiaij->lvec)); 6421 #if defined(PETSC_USE_CTABLE) 6422 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6423 #else 6424 PetscCall(PetscFree(mpiaij->colmap)); 6425 #endif 6426 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6427 mat->assembled = PETSC_FALSE; 6428 mat->was_assembled = PETSC_FALSE; 6429 6430 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6431 PetscCallMPI(MPI_Comm_size(comm, &size)); 6432 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6433 PetscCall(PetscLayoutSetUp(mat->rmap)); 6434 PetscCall(PetscLayoutSetUp(mat->cmap)); 6435 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6436 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6437 PetscCall(MatGetLocalSize(mat, &m, &n)); 6438 PetscCall(MatGetSize(mat, &M, &N)); 6439 6440 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6441 /* entries come first, then local rows, then remote rows. */ 6442 PetscCount n1 = coo_n, *perm1; 6443 PetscInt *i1 = coo_i, *j1 = coo_j; 6444 6445 PetscCall(PetscMalloc1(n1, &perm1)); 6446 for (k = 0; k < n1; k++) perm1[k] = k; 6447 6448 /* Manipulate indices so that entries with negative row or col indices will have smallest 6449 row indices, local entries will have greater but negative row indices, and remote entries 6450 will have positive row indices. 6451 */ 6452 for (k = 0; k < n1; k++) { 6453 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6454 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6455 else { 6456 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6457 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6458 } 6459 } 6460 6461 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6462 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6463 6464 /* Advance k to the first entry we need to take care of */ 6465 for (k = 0; k < n1; k++) 6466 if (i1[k] > PETSC_INT_MIN) break; 6467 PetscCount i1start = k; 6468 6469 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6470 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6471 6472 /* Send remote rows to their owner */ 6473 /* Find which rows should be sent to which remote ranks*/ 6474 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6475 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6476 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6477 const PetscInt *ranges; 6478 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6479 6480 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6481 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6482 for (k = rem; k < n1;) { 6483 PetscMPIInt owner; 6484 PetscInt firstRow, lastRow; 6485 6486 /* Locate a row range */ 6487 firstRow = i1[k]; /* first row of this owner */ 6488 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6489 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6490 6491 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6492 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6493 6494 /* All entries in [k,p) belong to this remote owner */ 6495 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6496 PetscMPIInt *sendto2; 6497 PetscInt *nentries2; 6498 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6499 6500 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6501 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6502 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6503 PetscCall(PetscFree2(sendto, nentries2)); 6504 sendto = sendto2; 6505 nentries = nentries2; 6506 maxNsend = maxNsend2; 6507 } 6508 sendto[nsend] = owner; 6509 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6510 nsend++; 6511 k = p; 6512 } 6513 6514 /* Build 1st SF to know offsets on remote to send data */ 6515 PetscSF sf1; 6516 PetscInt nroots = 1, nroots2 = 0; 6517 PetscInt nleaves = nsend, nleaves2 = 0; 6518 PetscInt *offsets; 6519 PetscSFNode *iremote; 6520 6521 PetscCall(PetscSFCreate(comm, &sf1)); 6522 PetscCall(PetscMalloc1(nsend, &iremote)); 6523 PetscCall(PetscMalloc1(nsend, &offsets)); 6524 for (k = 0; k < nsend; k++) { 6525 iremote[k].rank = sendto[k]; 6526 iremote[k].index = 0; 6527 nleaves2 += nentries[k]; 6528 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6529 } 6530 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6531 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6532 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6533 PetscCall(PetscSFDestroy(&sf1)); 6534 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6535 6536 /* Build 2nd SF to send remote COOs to their owner */ 6537 PetscSF sf2; 6538 nroots = nroots2; 6539 nleaves = nleaves2; 6540 PetscCall(PetscSFCreate(comm, &sf2)); 6541 PetscCall(PetscSFSetFromOptions(sf2)); 6542 PetscCall(PetscMalloc1(nleaves, &iremote)); 6543 p = 0; 6544 for (k = 0; k < nsend; k++) { 6545 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6546 for (q = 0; q < nentries[k]; q++, p++) { 6547 iremote[p].rank = sendto[k]; 6548 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6549 } 6550 } 6551 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6552 6553 /* Send the remote COOs to their owner */ 6554 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6555 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6556 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6557 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6558 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6559 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6560 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6561 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6562 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6563 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6564 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6565 6566 PetscCall(PetscFree(offsets)); 6567 PetscCall(PetscFree2(sendto, nentries)); 6568 6569 /* Sort received COOs by row along with the permutation array */ 6570 for (k = 0; k < n2; k++) perm2[k] = k; 6571 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6572 6573 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6574 PetscCount *Cperm1; 6575 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6576 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6577 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6578 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6579 6580 /* Support for HYPRE matrices, kind of a hack. 6581 Swap min column with diagonal so that diagonal values will go first */ 6582 PetscBool hypre; 6583 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6584 if (hypre) { 6585 PetscInt *minj; 6586 PetscBT hasdiag; 6587 6588 PetscCall(PetscBTCreate(m, &hasdiag)); 6589 PetscCall(PetscMalloc1(m, &minj)); 6590 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6591 for (k = i1start; k < rem; k++) { 6592 if (j1[k] < cstart || j1[k] >= cend) continue; 6593 const PetscInt rindex = i1[k] - rstart; 6594 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6595 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6596 } 6597 for (k = 0; k < n2; k++) { 6598 if (j2[k] < cstart || j2[k] >= cend) continue; 6599 const PetscInt rindex = i2[k] - rstart; 6600 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6601 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6602 } 6603 for (k = i1start; k < rem; k++) { 6604 const PetscInt rindex = i1[k] - rstart; 6605 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6606 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6607 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6608 } 6609 for (k = 0; k < n2; k++) { 6610 const PetscInt rindex = i2[k] - rstart; 6611 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6612 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6613 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6614 } 6615 PetscCall(PetscBTDestroy(&hasdiag)); 6616 PetscCall(PetscFree(minj)); 6617 } 6618 6619 /* Split local COOs and received COOs into diag/offdiag portions */ 6620 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6621 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6622 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6623 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6624 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6625 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6626 6627 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6628 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6629 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6630 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6631 6632 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6633 PetscInt *Ai, *Bi; 6634 PetscInt *Aj, *Bj; 6635 6636 PetscCall(PetscMalloc1(m + 1, &Ai)); 6637 PetscCall(PetscMalloc1(m + 1, &Bi)); 6638 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6639 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6640 6641 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6642 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6643 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6644 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6645 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6646 6647 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6648 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6649 6650 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6651 /* expect nonzeros in A/B most likely have local contributing entries */ 6652 PetscInt Annz = Ai[m]; 6653 PetscInt Bnnz = Bi[m]; 6654 PetscCount *Ajmap1_new, *Bjmap1_new; 6655 6656 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6657 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6658 6659 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6660 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6661 6662 PetscCall(PetscFree(Aimap1)); 6663 PetscCall(PetscFree(Ajmap1)); 6664 PetscCall(PetscFree(Bimap1)); 6665 PetscCall(PetscFree(Bjmap1)); 6666 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6667 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6668 PetscCall(PetscFree(perm1)); 6669 PetscCall(PetscFree3(i2, j2, perm2)); 6670 6671 Ajmap1 = Ajmap1_new; 6672 Bjmap1 = Bjmap1_new; 6673 6674 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6675 if (Annz < Annz1 + Annz2) { 6676 PetscInt *Aj_new; 6677 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6678 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6679 PetscCall(PetscFree(Aj)); 6680 Aj = Aj_new; 6681 } 6682 6683 if (Bnnz < Bnnz1 + Bnnz2) { 6684 PetscInt *Bj_new; 6685 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6686 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6687 PetscCall(PetscFree(Bj)); 6688 Bj = Bj_new; 6689 } 6690 6691 /* Create new submatrices for on-process and off-process coupling */ 6692 PetscScalar *Aa, *Ba; 6693 MatType rtype; 6694 Mat_SeqAIJ *a, *b; 6695 PetscObjectState state; 6696 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6697 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6698 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6699 if (cstart) { 6700 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6701 } 6702 6703 PetscCall(MatGetRootType_Private(mat, &rtype)); 6704 6705 MatSeqXAIJGetOptions_Private(mpiaij->A); 6706 PetscCall(MatDestroy(&mpiaij->A)); 6707 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6708 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6709 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6710 6711 MatSeqXAIJGetOptions_Private(mpiaij->B); 6712 PetscCall(MatDestroy(&mpiaij->B)); 6713 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6714 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6715 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6716 6717 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6718 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6719 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6720 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6721 6722 a = (Mat_SeqAIJ *)mpiaij->A->data; 6723 b = (Mat_SeqAIJ *)mpiaij->B->data; 6724 a->free_a = PETSC_TRUE; 6725 a->free_ij = PETSC_TRUE; 6726 b->free_a = PETSC_TRUE; 6727 b->free_ij = PETSC_TRUE; 6728 a->maxnz = a->nz; 6729 b->maxnz = b->nz; 6730 6731 /* conversion must happen AFTER multiply setup */ 6732 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6733 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6734 PetscCall(VecDestroy(&mpiaij->lvec)); 6735 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6736 6737 // Put the COO struct in a container and then attach that to the matrix 6738 PetscCall(PetscMalloc1(1, &coo)); 6739 coo->n = coo_n; 6740 coo->sf = sf2; 6741 coo->sendlen = nleaves; 6742 coo->recvlen = nroots; 6743 coo->Annz = Annz; 6744 coo->Bnnz = Bnnz; 6745 coo->Annz2 = Annz2; 6746 coo->Bnnz2 = Bnnz2; 6747 coo->Atot1 = Atot1; 6748 coo->Atot2 = Atot2; 6749 coo->Btot1 = Btot1; 6750 coo->Btot2 = Btot2; 6751 coo->Ajmap1 = Ajmap1; 6752 coo->Aperm1 = Aperm1; 6753 coo->Bjmap1 = Bjmap1; 6754 coo->Bperm1 = Bperm1; 6755 coo->Aimap2 = Aimap2; 6756 coo->Ajmap2 = Ajmap2; 6757 coo->Aperm2 = Aperm2; 6758 coo->Bimap2 = Bimap2; 6759 coo->Bjmap2 = Bjmap2; 6760 coo->Bperm2 = Bperm2; 6761 coo->Cperm1 = Cperm1; 6762 // Allocate in preallocation. If not used, it has zero cost on host 6763 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6764 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6765 PetscCall(PetscContainerSetPointer(container, coo)); 6766 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6767 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6768 PetscCall(PetscContainerDestroy(&container)); 6769 PetscFunctionReturn(PETSC_SUCCESS); 6770 } 6771 6772 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6773 { 6774 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6775 Mat A = mpiaij->A, B = mpiaij->B; 6776 PetscScalar *Aa, *Ba; 6777 PetscScalar *sendbuf, *recvbuf; 6778 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6779 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6780 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6781 const PetscCount *Cperm1; 6782 PetscContainer container; 6783 MatCOOStruct_MPIAIJ *coo; 6784 6785 PetscFunctionBegin; 6786 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6787 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6788 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6789 sendbuf = coo->sendbuf; 6790 recvbuf = coo->recvbuf; 6791 Ajmap1 = coo->Ajmap1; 6792 Ajmap2 = coo->Ajmap2; 6793 Aimap2 = coo->Aimap2; 6794 Bjmap1 = coo->Bjmap1; 6795 Bjmap2 = coo->Bjmap2; 6796 Bimap2 = coo->Bimap2; 6797 Aperm1 = coo->Aperm1; 6798 Aperm2 = coo->Aperm2; 6799 Bperm1 = coo->Bperm1; 6800 Bperm2 = coo->Bperm2; 6801 Cperm1 = coo->Cperm1; 6802 6803 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6804 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6805 6806 /* Pack entries to be sent to remote */ 6807 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6808 6809 /* Send remote entries to their owner and overlap the communication with local computation */ 6810 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6811 /* Add local entries to A and B */ 6812 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6813 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6814 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6815 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6816 } 6817 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6818 PetscScalar sum = 0.0; 6819 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6820 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6821 } 6822 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6823 6824 /* Add received remote entries to A and B */ 6825 for (PetscCount i = 0; i < coo->Annz2; i++) { 6826 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6827 } 6828 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6829 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6830 } 6831 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6832 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6833 PetscFunctionReturn(PETSC_SUCCESS); 6834 } 6835 6836 /*MC 6837 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6838 6839 Options Database Keys: 6840 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6841 6842 Level: beginner 6843 6844 Notes: 6845 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6846 in this case the values associated with the rows and columns one passes in are set to zero 6847 in the matrix 6848 6849 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6850 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6851 6852 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6853 M*/ 6854 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6855 { 6856 Mat_MPIAIJ *b; 6857 PetscMPIInt size; 6858 6859 PetscFunctionBegin; 6860 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6861 6862 PetscCall(PetscNew(&b)); 6863 B->data = (void *)b; 6864 B->ops[0] = MatOps_Values; 6865 B->assembled = PETSC_FALSE; 6866 B->insertmode = NOT_SET_VALUES; 6867 b->size = size; 6868 6869 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6870 6871 /* build cache for off array entries formed */ 6872 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6873 6874 b->donotstash = PETSC_FALSE; 6875 b->colmap = NULL; 6876 b->garray = NULL; 6877 b->roworiented = PETSC_TRUE; 6878 6879 /* stuff used for matrix vector multiply */ 6880 b->lvec = NULL; 6881 b->Mvctx = NULL; 6882 6883 /* stuff for MatGetRow() */ 6884 b->rowindices = NULL; 6885 b->rowvalues = NULL; 6886 b->getrowactive = PETSC_FALSE; 6887 6888 /* flexible pointer used in CUSPARSE classes */ 6889 b->spptr = NULL; 6890 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6901 #if defined(PETSC_HAVE_CUDA) 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6903 #endif 6904 #if defined(PETSC_HAVE_HIP) 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6906 #endif 6907 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6909 #endif 6910 #if defined(PETSC_HAVE_MKL_SPARSE) 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6912 #endif 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6917 #if defined(PETSC_HAVE_ELEMENTAL) 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6919 #endif 6920 #if defined(PETSC_HAVE_SCALAPACK) 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6922 #endif 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6925 #if defined(PETSC_HAVE_HYPRE) 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6928 #endif 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6933 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6934 PetscFunctionReturn(PETSC_SUCCESS); 6935 } 6936 6937 /*@ 6938 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6939 and "off-diagonal" part of the matrix in CSR format. 6940 6941 Collective 6942 6943 Input Parameters: 6944 + comm - MPI communicator 6945 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6946 . n - This value should be the same as the local size used in creating the 6947 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6948 calculated if `N` is given) For square matrices `n` is almost always `m`. 6949 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6950 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6951 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6952 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6953 . a - matrix values 6954 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6955 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6956 - oa - matrix values 6957 6958 Output Parameter: 6959 . mat - the matrix 6960 6961 Level: advanced 6962 6963 Notes: 6964 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6965 must free the arrays once the matrix has been destroyed and not before. 6966 6967 The `i` and `j` indices are 0 based 6968 6969 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6970 6971 This sets local rows and cannot be used to set off-processor values. 6972 6973 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6974 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6975 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6976 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6977 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6978 communication if it is known that only local entries will be set. 6979 6980 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6981 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6982 @*/ 6983 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6984 { 6985 Mat_MPIAIJ *maij; 6986 6987 PetscFunctionBegin; 6988 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6989 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6990 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6991 PetscCall(MatCreate(comm, mat)); 6992 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6993 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6994 maij = (Mat_MPIAIJ *)(*mat)->data; 6995 6996 (*mat)->preallocated = PETSC_TRUE; 6997 6998 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6999 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7000 7001 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7002 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7003 7004 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7005 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7006 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7007 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7008 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7009 PetscFunctionReturn(PETSC_SUCCESS); 7010 } 7011 7012 typedef struct { 7013 Mat *mp; /* intermediate products */ 7014 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7015 PetscInt cp; /* number of intermediate products */ 7016 7017 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7018 PetscInt *startsj_s, *startsj_r; 7019 PetscScalar *bufa; 7020 Mat P_oth; 7021 7022 /* may take advantage of merging product->B */ 7023 Mat Bloc; /* B-local by merging diag and off-diag */ 7024 7025 /* cusparse does not have support to split between symbolic and numeric phases. 7026 When api_user is true, we don't need to update the numerical values 7027 of the temporary storage */ 7028 PetscBool reusesym; 7029 7030 /* support for COO values insertion */ 7031 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7032 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7033 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7034 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7035 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7036 PetscMemType mtype; 7037 7038 /* customization */ 7039 PetscBool abmerge; 7040 PetscBool P_oth_bind; 7041 } MatMatMPIAIJBACKEND; 7042 7043 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7044 { 7045 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7046 PetscInt i; 7047 7048 PetscFunctionBegin; 7049 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7050 PetscCall(PetscFree(mmdata->bufa)); 7051 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7052 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7053 PetscCall(MatDestroy(&mmdata->P_oth)); 7054 PetscCall(MatDestroy(&mmdata->Bloc)); 7055 PetscCall(PetscSFDestroy(&mmdata->sf)); 7056 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7057 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7058 PetscCall(PetscFree(mmdata->own[0])); 7059 PetscCall(PetscFree(mmdata->own)); 7060 PetscCall(PetscFree(mmdata->off[0])); 7061 PetscCall(PetscFree(mmdata->off)); 7062 PetscCall(PetscFree(mmdata)); 7063 PetscFunctionReturn(PETSC_SUCCESS); 7064 } 7065 7066 /* Copy selected n entries with indices in idx[] of A to v[]. 7067 If idx is NULL, copy the whole data array of A to v[] 7068 */ 7069 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7070 { 7071 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7072 7073 PetscFunctionBegin; 7074 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7075 if (f) { 7076 PetscCall((*f)(A, n, idx, v)); 7077 } else { 7078 const PetscScalar *vv; 7079 7080 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7081 if (n && idx) { 7082 PetscScalar *w = v; 7083 const PetscInt *oi = idx; 7084 PetscInt j; 7085 7086 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7087 } else { 7088 PetscCall(PetscArraycpy(v, vv, n)); 7089 } 7090 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7091 } 7092 PetscFunctionReturn(PETSC_SUCCESS); 7093 } 7094 7095 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7096 { 7097 MatMatMPIAIJBACKEND *mmdata; 7098 PetscInt i, n_d, n_o; 7099 7100 PetscFunctionBegin; 7101 MatCheckProduct(C, 1); 7102 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7103 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7104 if (!mmdata->reusesym) { /* update temporary matrices */ 7105 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7106 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7107 } 7108 mmdata->reusesym = PETSC_FALSE; 7109 7110 for (i = 0; i < mmdata->cp; i++) { 7111 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7112 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7113 } 7114 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7115 PetscInt noff; 7116 7117 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7118 if (mmdata->mptmp[i]) continue; 7119 if (noff) { 7120 PetscInt nown; 7121 7122 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7123 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7124 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7125 n_o += noff; 7126 n_d += nown; 7127 } else { 7128 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7129 7130 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7131 n_d += mm->nz; 7132 } 7133 } 7134 if (mmdata->hasoffproc) { /* offprocess insertion */ 7135 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7136 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7137 } 7138 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7139 PetscFunctionReturn(PETSC_SUCCESS); 7140 } 7141 7142 /* Support for Pt * A, A * P, or Pt * A * P */ 7143 #define MAX_NUMBER_INTERMEDIATE 4 7144 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7145 { 7146 Mat_Product *product = C->product; 7147 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7148 Mat_MPIAIJ *a, *p; 7149 MatMatMPIAIJBACKEND *mmdata; 7150 ISLocalToGlobalMapping P_oth_l2g = NULL; 7151 IS glob = NULL; 7152 const char *prefix; 7153 char pprefix[256]; 7154 const PetscInt *globidx, *P_oth_idx; 7155 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7156 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7157 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7158 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7159 /* a base offset; type-2: sparse with a local to global map table */ 7160 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7161 7162 MatProductType ptype; 7163 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7164 PetscMPIInt size; 7165 7166 PetscFunctionBegin; 7167 MatCheckProduct(C, 1); 7168 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7169 ptype = product->type; 7170 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7171 ptype = MATPRODUCT_AB; 7172 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7173 } 7174 switch (ptype) { 7175 case MATPRODUCT_AB: 7176 A = product->A; 7177 P = product->B; 7178 m = A->rmap->n; 7179 n = P->cmap->n; 7180 M = A->rmap->N; 7181 N = P->cmap->N; 7182 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7183 break; 7184 case MATPRODUCT_AtB: 7185 P = product->A; 7186 A = product->B; 7187 m = P->cmap->n; 7188 n = A->cmap->n; 7189 M = P->cmap->N; 7190 N = A->cmap->N; 7191 hasoffproc = PETSC_TRUE; 7192 break; 7193 case MATPRODUCT_PtAP: 7194 A = product->A; 7195 P = product->B; 7196 m = P->cmap->n; 7197 n = P->cmap->n; 7198 M = P->cmap->N; 7199 N = P->cmap->N; 7200 hasoffproc = PETSC_TRUE; 7201 break; 7202 default: 7203 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7204 } 7205 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7206 if (size == 1) hasoffproc = PETSC_FALSE; 7207 7208 /* defaults */ 7209 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7210 mp[i] = NULL; 7211 mptmp[i] = PETSC_FALSE; 7212 rmapt[i] = -1; 7213 cmapt[i] = -1; 7214 rmapa[i] = NULL; 7215 cmapa[i] = NULL; 7216 } 7217 7218 /* customization */ 7219 PetscCall(PetscNew(&mmdata)); 7220 mmdata->reusesym = product->api_user; 7221 if (ptype == MATPRODUCT_AB) { 7222 if (product->api_user) { 7223 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7224 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7225 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7226 PetscOptionsEnd(); 7227 } else { 7228 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7229 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7230 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7231 PetscOptionsEnd(); 7232 } 7233 } else if (ptype == MATPRODUCT_PtAP) { 7234 if (product->api_user) { 7235 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7236 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7237 PetscOptionsEnd(); 7238 } else { 7239 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7240 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7241 PetscOptionsEnd(); 7242 } 7243 } 7244 a = (Mat_MPIAIJ *)A->data; 7245 p = (Mat_MPIAIJ *)P->data; 7246 PetscCall(MatSetSizes(C, m, n, M, N)); 7247 PetscCall(PetscLayoutSetUp(C->rmap)); 7248 PetscCall(PetscLayoutSetUp(C->cmap)); 7249 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7250 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7251 7252 cp = 0; 7253 switch (ptype) { 7254 case MATPRODUCT_AB: /* A * P */ 7255 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7256 7257 /* A_diag * P_local (merged or not) */ 7258 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7259 /* P is product->B */ 7260 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7261 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7262 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7263 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7264 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7265 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7266 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7267 mp[cp]->product->api_user = product->api_user; 7268 PetscCall(MatProductSetFromOptions(mp[cp])); 7269 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7270 PetscCall(ISGetIndices(glob, &globidx)); 7271 rmapt[cp] = 1; 7272 cmapt[cp] = 2; 7273 cmapa[cp] = globidx; 7274 mptmp[cp] = PETSC_FALSE; 7275 cp++; 7276 } else { /* A_diag * P_diag and A_diag * P_off */ 7277 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7278 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7279 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7280 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7281 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7282 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7283 mp[cp]->product->api_user = product->api_user; 7284 PetscCall(MatProductSetFromOptions(mp[cp])); 7285 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7286 rmapt[cp] = 1; 7287 cmapt[cp] = 1; 7288 mptmp[cp] = PETSC_FALSE; 7289 cp++; 7290 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7291 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7292 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7293 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7294 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7295 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7296 mp[cp]->product->api_user = product->api_user; 7297 PetscCall(MatProductSetFromOptions(mp[cp])); 7298 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7299 rmapt[cp] = 1; 7300 cmapt[cp] = 2; 7301 cmapa[cp] = p->garray; 7302 mptmp[cp] = PETSC_FALSE; 7303 cp++; 7304 } 7305 7306 /* A_off * P_other */ 7307 if (mmdata->P_oth) { 7308 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7309 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7310 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7311 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7312 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7313 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7314 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7315 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7316 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7317 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7318 mp[cp]->product->api_user = product->api_user; 7319 PetscCall(MatProductSetFromOptions(mp[cp])); 7320 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7321 rmapt[cp] = 1; 7322 cmapt[cp] = 2; 7323 cmapa[cp] = P_oth_idx; 7324 mptmp[cp] = PETSC_FALSE; 7325 cp++; 7326 } 7327 break; 7328 7329 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7330 /* A is product->B */ 7331 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7332 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7333 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7334 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7335 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7336 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7337 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7338 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7339 mp[cp]->product->api_user = product->api_user; 7340 PetscCall(MatProductSetFromOptions(mp[cp])); 7341 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7342 PetscCall(ISGetIndices(glob, &globidx)); 7343 rmapt[cp] = 2; 7344 rmapa[cp] = globidx; 7345 cmapt[cp] = 2; 7346 cmapa[cp] = globidx; 7347 mptmp[cp] = PETSC_FALSE; 7348 cp++; 7349 } else { 7350 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7351 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7352 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7353 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7354 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7355 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7356 mp[cp]->product->api_user = product->api_user; 7357 PetscCall(MatProductSetFromOptions(mp[cp])); 7358 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7359 PetscCall(ISGetIndices(glob, &globidx)); 7360 rmapt[cp] = 1; 7361 cmapt[cp] = 2; 7362 cmapa[cp] = globidx; 7363 mptmp[cp] = PETSC_FALSE; 7364 cp++; 7365 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7366 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7367 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7368 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7369 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7370 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7371 mp[cp]->product->api_user = product->api_user; 7372 PetscCall(MatProductSetFromOptions(mp[cp])); 7373 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7374 rmapt[cp] = 2; 7375 rmapa[cp] = p->garray; 7376 cmapt[cp] = 2; 7377 cmapa[cp] = globidx; 7378 mptmp[cp] = PETSC_FALSE; 7379 cp++; 7380 } 7381 break; 7382 case MATPRODUCT_PtAP: 7383 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7384 /* P is product->B */ 7385 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7386 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7387 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7388 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7389 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7390 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7391 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7392 mp[cp]->product->api_user = product->api_user; 7393 PetscCall(MatProductSetFromOptions(mp[cp])); 7394 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7395 PetscCall(ISGetIndices(glob, &globidx)); 7396 rmapt[cp] = 2; 7397 rmapa[cp] = globidx; 7398 cmapt[cp] = 2; 7399 cmapa[cp] = globidx; 7400 mptmp[cp] = PETSC_FALSE; 7401 cp++; 7402 if (mmdata->P_oth) { 7403 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7404 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7405 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7406 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7407 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7408 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7409 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7410 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7411 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7412 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7413 mp[cp]->product->api_user = product->api_user; 7414 PetscCall(MatProductSetFromOptions(mp[cp])); 7415 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7416 mptmp[cp] = PETSC_TRUE; 7417 cp++; 7418 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7419 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7420 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7421 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7422 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7423 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7424 mp[cp]->product->api_user = product->api_user; 7425 PetscCall(MatProductSetFromOptions(mp[cp])); 7426 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7427 rmapt[cp] = 2; 7428 rmapa[cp] = globidx; 7429 cmapt[cp] = 2; 7430 cmapa[cp] = P_oth_idx; 7431 mptmp[cp] = PETSC_FALSE; 7432 cp++; 7433 } 7434 break; 7435 default: 7436 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7437 } 7438 /* sanity check */ 7439 if (size > 1) 7440 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7441 7442 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7443 for (i = 0; i < cp; i++) { 7444 mmdata->mp[i] = mp[i]; 7445 mmdata->mptmp[i] = mptmp[i]; 7446 } 7447 mmdata->cp = cp; 7448 C->product->data = mmdata; 7449 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7450 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7451 7452 /* memory type */ 7453 mmdata->mtype = PETSC_MEMTYPE_HOST; 7454 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7455 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7456 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7457 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7458 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7459 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7460 7461 /* prepare coo coordinates for values insertion */ 7462 7463 /* count total nonzeros of those intermediate seqaij Mats 7464 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7465 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7466 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7467 */ 7468 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7469 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7470 if (mptmp[cp]) continue; 7471 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7472 const PetscInt *rmap = rmapa[cp]; 7473 const PetscInt mr = mp[cp]->rmap->n; 7474 const PetscInt rs = C->rmap->rstart; 7475 const PetscInt re = C->rmap->rend; 7476 const PetscInt *ii = mm->i; 7477 for (i = 0; i < mr; i++) { 7478 const PetscInt gr = rmap[i]; 7479 const PetscInt nz = ii[i + 1] - ii[i]; 7480 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7481 else ncoo_oown += nz; /* this row is local */ 7482 } 7483 } else ncoo_d += mm->nz; 7484 } 7485 7486 /* 7487 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7488 7489 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7490 7491 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7492 7493 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7494 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7495 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7496 7497 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7498 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7499 */ 7500 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7501 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7502 7503 /* gather (i,j) of nonzeros inserted by remote procs */ 7504 if (hasoffproc) { 7505 PetscSF msf; 7506 PetscInt ncoo2, *coo_i2, *coo_j2; 7507 7508 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7509 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7510 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7511 7512 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7513 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7514 PetscInt *idxoff = mmdata->off[cp]; 7515 PetscInt *idxown = mmdata->own[cp]; 7516 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7517 const PetscInt *rmap = rmapa[cp]; 7518 const PetscInt *cmap = cmapa[cp]; 7519 const PetscInt *ii = mm->i; 7520 PetscInt *coi = coo_i + ncoo_o; 7521 PetscInt *coj = coo_j + ncoo_o; 7522 const PetscInt mr = mp[cp]->rmap->n; 7523 const PetscInt rs = C->rmap->rstart; 7524 const PetscInt re = C->rmap->rend; 7525 const PetscInt cs = C->cmap->rstart; 7526 for (i = 0; i < mr; i++) { 7527 const PetscInt *jj = mm->j + ii[i]; 7528 const PetscInt gr = rmap[i]; 7529 const PetscInt nz = ii[i + 1] - ii[i]; 7530 if (gr < rs || gr >= re) { /* this is an offproc row */ 7531 for (j = ii[i]; j < ii[i + 1]; j++) { 7532 *coi++ = gr; 7533 *idxoff++ = j; 7534 } 7535 if (!cmapt[cp]) { /* already global */ 7536 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7537 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7538 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7539 } else { /* offdiag */ 7540 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7541 } 7542 ncoo_o += nz; 7543 } else { /* this is a local row */ 7544 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7545 } 7546 } 7547 } 7548 mmdata->off[cp + 1] = idxoff; 7549 mmdata->own[cp + 1] = idxown; 7550 } 7551 7552 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7553 PetscInt incoo_o; 7554 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7555 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7556 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7557 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7558 ncoo = ncoo_d + ncoo_oown + ncoo2; 7559 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7560 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7561 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7562 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7563 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7564 PetscCall(PetscFree2(coo_i, coo_j)); 7565 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7566 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7567 coo_i = coo_i2; 7568 coo_j = coo_j2; 7569 } else { /* no offproc values insertion */ 7570 ncoo = ncoo_d; 7571 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7572 7573 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7574 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7575 PetscCall(PetscSFSetUp(mmdata->sf)); 7576 } 7577 mmdata->hasoffproc = hasoffproc; 7578 7579 /* gather (i,j) of nonzeros inserted locally */ 7580 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7581 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7582 PetscInt *coi = coo_i + ncoo_d; 7583 PetscInt *coj = coo_j + ncoo_d; 7584 const PetscInt *jj = mm->j; 7585 const PetscInt *ii = mm->i; 7586 const PetscInt *cmap = cmapa[cp]; 7587 const PetscInt *rmap = rmapa[cp]; 7588 const PetscInt mr = mp[cp]->rmap->n; 7589 const PetscInt rs = C->rmap->rstart; 7590 const PetscInt re = C->rmap->rend; 7591 const PetscInt cs = C->cmap->rstart; 7592 7593 if (mptmp[cp]) continue; 7594 if (rmapt[cp] == 1) { /* consecutive rows */ 7595 /* fill coo_i */ 7596 for (i = 0; i < mr; i++) { 7597 const PetscInt gr = i + rs; 7598 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7599 } 7600 /* fill coo_j */ 7601 if (!cmapt[cp]) { /* type-0, already global */ 7602 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7603 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7604 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7605 } else { /* type-2, local to global for sparse columns */ 7606 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7607 } 7608 ncoo_d += mm->nz; 7609 } else if (rmapt[cp] == 2) { /* sparse rows */ 7610 for (i = 0; i < mr; i++) { 7611 const PetscInt *jj = mm->j + ii[i]; 7612 const PetscInt gr = rmap[i]; 7613 const PetscInt nz = ii[i + 1] - ii[i]; 7614 if (gr >= rs && gr < re) { /* local rows */ 7615 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7616 if (!cmapt[cp]) { /* type-0, already global */ 7617 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7618 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7619 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7620 } else { /* type-2, local to global for sparse columns */ 7621 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7622 } 7623 ncoo_d += nz; 7624 } 7625 } 7626 } 7627 } 7628 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7629 PetscCall(ISDestroy(&glob)); 7630 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7631 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7632 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7633 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7634 7635 /* preallocate with COO data */ 7636 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7637 PetscCall(PetscFree2(coo_i, coo_j)); 7638 PetscFunctionReturn(PETSC_SUCCESS); 7639 } 7640 7641 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7642 { 7643 Mat_Product *product = mat->product; 7644 #if defined(PETSC_HAVE_DEVICE) 7645 PetscBool match = PETSC_FALSE; 7646 PetscBool usecpu = PETSC_FALSE; 7647 #else 7648 PetscBool match = PETSC_TRUE; 7649 #endif 7650 7651 PetscFunctionBegin; 7652 MatCheckProduct(mat, 1); 7653 #if defined(PETSC_HAVE_DEVICE) 7654 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7655 if (match) { /* we can always fallback to the CPU if requested */ 7656 switch (product->type) { 7657 case MATPRODUCT_AB: 7658 if (product->api_user) { 7659 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7660 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7661 PetscOptionsEnd(); 7662 } else { 7663 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7664 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7665 PetscOptionsEnd(); 7666 } 7667 break; 7668 case MATPRODUCT_AtB: 7669 if (product->api_user) { 7670 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7671 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7672 PetscOptionsEnd(); 7673 } else { 7674 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7675 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7676 PetscOptionsEnd(); 7677 } 7678 break; 7679 case MATPRODUCT_PtAP: 7680 if (product->api_user) { 7681 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7682 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7683 PetscOptionsEnd(); 7684 } else { 7685 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7686 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7687 PetscOptionsEnd(); 7688 } 7689 break; 7690 default: 7691 break; 7692 } 7693 match = (PetscBool)!usecpu; 7694 } 7695 #endif 7696 if (match) { 7697 switch (product->type) { 7698 case MATPRODUCT_AB: 7699 case MATPRODUCT_AtB: 7700 case MATPRODUCT_PtAP: 7701 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7702 break; 7703 default: 7704 break; 7705 } 7706 } 7707 /* fallback to MPIAIJ ops */ 7708 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7709 PetscFunctionReturn(PETSC_SUCCESS); 7710 } 7711 7712 /* 7713 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7714 7715 n - the number of block indices in cc[] 7716 cc - the block indices (must be large enough to contain the indices) 7717 */ 7718 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7719 { 7720 PetscInt cnt = -1, nidx, j; 7721 const PetscInt *idx; 7722 7723 PetscFunctionBegin; 7724 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7725 if (nidx) { 7726 cnt = 0; 7727 cc[cnt] = idx[0] / bs; 7728 for (j = 1; j < nidx; j++) { 7729 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7730 } 7731 } 7732 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7733 *n = cnt + 1; 7734 PetscFunctionReturn(PETSC_SUCCESS); 7735 } 7736 7737 /* 7738 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7739 7740 ncollapsed - the number of block indices 7741 collapsed - the block indices (must be large enough to contain the indices) 7742 */ 7743 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7744 { 7745 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7746 7747 PetscFunctionBegin; 7748 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7749 for (i = start + 1; i < start + bs; i++) { 7750 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7751 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7752 cprevtmp = cprev; 7753 cprev = merged; 7754 merged = cprevtmp; 7755 } 7756 *ncollapsed = nprev; 7757 if (collapsed) *collapsed = cprev; 7758 PetscFunctionReturn(PETSC_SUCCESS); 7759 } 7760 7761 /* 7762 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7763 7764 Input Parameter: 7765 . Amat - matrix 7766 - symmetrize - make the result symmetric 7767 + scale - scale with diagonal 7768 7769 Output Parameter: 7770 . a_Gmat - output scalar graph >= 0 7771 7772 */ 7773 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7774 { 7775 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7776 MPI_Comm comm; 7777 Mat Gmat; 7778 PetscBool ismpiaij, isseqaij; 7779 Mat a, b, c; 7780 MatType jtype; 7781 7782 PetscFunctionBegin; 7783 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7784 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7785 PetscCall(MatGetSize(Amat, &MM, &NN)); 7786 PetscCall(MatGetBlockSize(Amat, &bs)); 7787 nloc = (Iend - Istart) / bs; 7788 7789 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7790 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7791 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7792 7793 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7794 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7795 implementation */ 7796 if (bs > 1) { 7797 PetscCall(MatGetType(Amat, &jtype)); 7798 PetscCall(MatCreate(comm, &Gmat)); 7799 PetscCall(MatSetType(Gmat, jtype)); 7800 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7801 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7802 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7803 PetscInt *d_nnz, *o_nnz; 7804 MatScalar *aa, val, *AA; 7805 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7806 7807 if (isseqaij) { 7808 a = Amat; 7809 b = NULL; 7810 } else { 7811 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7812 a = d->A; 7813 b = d->B; 7814 } 7815 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7816 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7817 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7818 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7819 const PetscInt *cols1, *cols2; 7820 7821 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7822 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7823 nnz[brow / bs] = nc2 / bs; 7824 if (nc2 % bs) ok = 0; 7825 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7826 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7827 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7828 if (nc1 != nc2) ok = 0; 7829 else { 7830 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7831 if (cols1[jj] != cols2[jj]) ok = 0; 7832 if (cols1[jj] % bs != jj % bs) ok = 0; 7833 } 7834 } 7835 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7836 } 7837 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7838 if (!ok) { 7839 PetscCall(PetscFree2(d_nnz, o_nnz)); 7840 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7841 goto old_bs; 7842 } 7843 } 7844 } 7845 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7846 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7847 PetscCall(PetscFree2(d_nnz, o_nnz)); 7848 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7849 // diag 7850 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7851 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7852 7853 ai = aseq->i; 7854 n = ai[brow + 1] - ai[brow]; 7855 aj = aseq->j + ai[brow]; 7856 for (PetscInt k = 0; k < n; k += bs) { // block columns 7857 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7858 val = 0; 7859 if (index_size == 0) { 7860 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7861 aa = aseq->a + ai[brow + ii] + k; 7862 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7863 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7864 } 7865 } 7866 } else { // use (index,index) value if provided 7867 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7868 PetscInt ii = index[iii]; 7869 aa = aseq->a + ai[brow + ii] + k; 7870 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7871 PetscInt jj = index[jjj]; 7872 val += PetscAbs(PetscRealPart(aa[jj])); 7873 } 7874 } 7875 } 7876 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7877 AA[k / bs] = val; 7878 } 7879 grow = Istart / bs + brow / bs; 7880 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7881 } 7882 // off-diag 7883 if (ismpiaij) { 7884 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7885 const PetscScalar *vals; 7886 const PetscInt *cols, *garray = aij->garray; 7887 7888 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7889 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7890 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7891 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7892 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7893 AA[k / bs] = 0; 7894 AJ[cidx] = garray[cols[k]] / bs; 7895 } 7896 nc = ncols / bs; 7897 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7898 if (index_size == 0) { 7899 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7900 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7901 for (PetscInt k = 0; k < ncols; k += bs) { 7902 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7903 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7904 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7905 } 7906 } 7907 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7908 } 7909 } else { // use (index,index) value if provided 7910 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7911 PetscInt ii = index[iii]; 7912 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7913 for (PetscInt k = 0; k < ncols; k += bs) { 7914 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7915 PetscInt jj = index[jjj]; 7916 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7917 } 7918 } 7919 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7920 } 7921 } 7922 grow = Istart / bs + brow / bs; 7923 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7924 } 7925 } 7926 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7927 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7928 PetscCall(PetscFree2(AA, AJ)); 7929 } else { 7930 const PetscScalar *vals; 7931 const PetscInt *idx; 7932 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7933 old_bs: 7934 /* 7935 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7936 */ 7937 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7938 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7939 if (isseqaij) { 7940 PetscInt max_d_nnz; 7941 7942 /* 7943 Determine exact preallocation count for (sequential) scalar matrix 7944 */ 7945 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7946 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7947 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7948 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7949 PetscCall(PetscFree3(w0, w1, w2)); 7950 } else if (ismpiaij) { 7951 Mat Daij, Oaij; 7952 const PetscInt *garray; 7953 PetscInt max_d_nnz; 7954 7955 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7956 /* 7957 Determine exact preallocation count for diagonal block portion of scalar matrix 7958 */ 7959 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7960 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7961 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7962 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7963 PetscCall(PetscFree3(w0, w1, w2)); 7964 /* 7965 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7966 */ 7967 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7968 o_nnz[jj] = 0; 7969 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7970 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7971 o_nnz[jj] += ncols; 7972 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7973 } 7974 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7975 } 7976 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7977 /* get scalar copy (norms) of matrix */ 7978 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7979 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7980 PetscCall(PetscFree2(d_nnz, o_nnz)); 7981 for (Ii = Istart; Ii < Iend; Ii++) { 7982 PetscInt dest_row = Ii / bs; 7983 7984 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7985 for (jj = 0; jj < ncols; jj++) { 7986 PetscInt dest_col = idx[jj] / bs; 7987 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7988 7989 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7990 } 7991 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7992 } 7993 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7994 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7995 } 7996 } else { 7997 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7998 else { 7999 Gmat = Amat; 8000 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8001 } 8002 if (isseqaij) { 8003 a = Gmat; 8004 b = NULL; 8005 } else { 8006 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8007 a = d->A; 8008 b = d->B; 8009 } 8010 if (filter >= 0 || scale) { 8011 /* take absolute value of each entry */ 8012 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8013 MatInfo info; 8014 PetscScalar *avals; 8015 8016 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8017 PetscCall(MatSeqAIJGetArray(c, &avals)); 8018 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8019 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8020 } 8021 } 8022 } 8023 if (symmetrize) { 8024 PetscBool isset, issym; 8025 8026 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8027 if (!isset || !issym) { 8028 Mat matTrans; 8029 8030 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8031 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8032 PetscCall(MatDestroy(&matTrans)); 8033 } 8034 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8035 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8036 if (scale) { 8037 /* scale c for all diagonal values = 1 or -1 */ 8038 Vec diag; 8039 8040 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8041 PetscCall(MatGetDiagonal(Gmat, diag)); 8042 PetscCall(VecReciprocal(diag)); 8043 PetscCall(VecSqrtAbs(diag)); 8044 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8045 PetscCall(VecDestroy(&diag)); 8046 } 8047 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8048 if (filter >= 0) { 8049 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8050 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8051 } 8052 *a_Gmat = Gmat; 8053 PetscFunctionReturn(PETSC_SUCCESS); 8054 } 8055 8056 /* 8057 Special version for direct calls from Fortran 8058 */ 8059 8060 /* Change these macros so can be used in void function */ 8061 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8062 #undef PetscCall 8063 #define PetscCall(...) \ 8064 do { \ 8065 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8066 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8067 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8068 return; \ 8069 } \ 8070 } while (0) 8071 8072 #undef SETERRQ 8073 #define SETERRQ(comm, ierr, ...) \ 8074 do { \ 8075 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8076 return; \ 8077 } while (0) 8078 8079 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8080 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8081 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8082 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8083 #else 8084 #endif 8085 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8086 { 8087 Mat mat = *mmat; 8088 PetscInt m = *mm, n = *mn; 8089 InsertMode addv = *maddv; 8090 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8091 PetscScalar value; 8092 8093 MatCheckPreallocated(mat, 1); 8094 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8095 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8096 { 8097 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8098 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8099 PetscBool roworiented = aij->roworiented; 8100 8101 /* Some Variables required in the macro */ 8102 Mat A = aij->A; 8103 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8104 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8105 MatScalar *aa; 8106 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8107 Mat B = aij->B; 8108 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8109 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8110 MatScalar *ba; 8111 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8112 * cannot use "#if defined" inside a macro. */ 8113 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8114 8115 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8116 PetscInt nonew = a->nonew; 8117 MatScalar *ap1, *ap2; 8118 8119 PetscFunctionBegin; 8120 PetscCall(MatSeqAIJGetArray(A, &aa)); 8121 PetscCall(MatSeqAIJGetArray(B, &ba)); 8122 for (i = 0; i < m; i++) { 8123 if (im[i] < 0) continue; 8124 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8125 if (im[i] >= rstart && im[i] < rend) { 8126 row = im[i] - rstart; 8127 lastcol1 = -1; 8128 rp1 = aj + ai[row]; 8129 ap1 = aa + ai[row]; 8130 rmax1 = aimax[row]; 8131 nrow1 = ailen[row]; 8132 low1 = 0; 8133 high1 = nrow1; 8134 lastcol2 = -1; 8135 rp2 = bj + bi[row]; 8136 ap2 = ba + bi[row]; 8137 rmax2 = bimax[row]; 8138 nrow2 = bilen[row]; 8139 low2 = 0; 8140 high2 = nrow2; 8141 8142 for (j = 0; j < n; j++) { 8143 if (roworiented) value = v[i * n + j]; 8144 else value = v[i + j * m]; 8145 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8146 if (in[j] >= cstart && in[j] < cend) { 8147 col = in[j] - cstart; 8148 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8149 } else if (in[j] < 0) continue; 8150 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8151 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8152 } else { 8153 if (mat->was_assembled) { 8154 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8155 #if defined(PETSC_USE_CTABLE) 8156 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8157 col--; 8158 #else 8159 col = aij->colmap[in[j]] - 1; 8160 #endif 8161 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8162 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8163 col = in[j]; 8164 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8165 B = aij->B; 8166 b = (Mat_SeqAIJ *)B->data; 8167 bimax = b->imax; 8168 bi = b->i; 8169 bilen = b->ilen; 8170 bj = b->j; 8171 rp2 = bj + bi[row]; 8172 ap2 = ba + bi[row]; 8173 rmax2 = bimax[row]; 8174 nrow2 = bilen[row]; 8175 low2 = 0; 8176 high2 = nrow2; 8177 bm = aij->B->rmap->n; 8178 ba = b->a; 8179 inserted = PETSC_FALSE; 8180 } 8181 } else col = in[j]; 8182 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8183 } 8184 } 8185 } else if (!aij->donotstash) { 8186 if (roworiented) { 8187 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8188 } else { 8189 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8190 } 8191 } 8192 } 8193 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8194 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8195 } 8196 PetscFunctionReturnVoid(); 8197 } 8198 8199 /* Undefining these here since they were redefined from their original definition above! No 8200 * other PETSc functions should be defined past this point, as it is impossible to recover the 8201 * original definitions */ 8202 #undef PetscCall 8203 #undef SETERRQ 8204