1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 PetscMPIInt in; 288 289 PetscFunctionBegin; 290 PetscCall(MatGetSize(A, &m, &n)); 291 PetscCall(PetscCalloc1(n, &work)); 292 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 294 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 295 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 296 if (type == NORM_2) { 297 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 298 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 299 } else if (type == NORM_1) { 300 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 301 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 302 } else if (type == NORM_INFINITY) { 303 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 304 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 305 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 306 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 307 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 308 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 309 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 310 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 311 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 312 PetscCall(PetscMPIIntCast(n, &in)); 313 if (type == NORM_INFINITY) { 314 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 315 } else { 316 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 317 } 318 PetscCall(PetscFree(work)); 319 if (type == NORM_2) { 320 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 321 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 322 for (i = 0; i < n; i++) reductions[i] /= m; 323 } 324 PetscFunctionReturn(PETSC_SUCCESS); 325 } 326 327 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 328 { 329 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 330 IS sis, gis; 331 const PetscInt *isis, *igis; 332 PetscInt n, *iis, nsis, ngis, rstart, i; 333 334 PetscFunctionBegin; 335 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 336 PetscCall(MatFindNonzeroRows(a->B, &gis)); 337 PetscCall(ISGetSize(gis, &ngis)); 338 PetscCall(ISGetSize(sis, &nsis)); 339 PetscCall(ISGetIndices(sis, &isis)); 340 PetscCall(ISGetIndices(gis, &igis)); 341 342 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 343 PetscCall(PetscArraycpy(iis, igis, ngis)); 344 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 345 n = ngis + nsis; 346 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 347 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 348 for (i = 0; i < n; i++) iis[i] += rstart; 349 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 350 351 PetscCall(ISRestoreIndices(sis, &isis)); 352 PetscCall(ISRestoreIndices(gis, &igis)); 353 PetscCall(ISDestroy(&sis)); 354 PetscCall(ISDestroy(&gis)); 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /* 359 Local utility routine that creates a mapping from the global column 360 number to the local number in the off-diagonal part of the local 361 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 362 a slightly higher hash table cost; without it it is not scalable (each processor 363 has an order N integer array but is fast to access. 364 */ 365 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 366 { 367 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 368 PetscInt n = aij->B->cmap->n, i; 369 370 PetscFunctionBegin; 371 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 372 #if defined(PETSC_USE_CTABLE) 373 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 374 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 375 #else 376 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 377 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 378 #endif 379 PetscFunctionReturn(PETSC_SUCCESS); 380 } 381 382 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 383 do { \ 384 if (col <= lastcol1) low1 = 0; \ 385 else high1 = nrow1; \ 386 lastcol1 = col; \ 387 while (high1 - low1 > 5) { \ 388 t = (low1 + high1) / 2; \ 389 if (rp1[t] > col) high1 = t; \ 390 else low1 = t; \ 391 } \ 392 for (_i = low1; _i < high1; _i++) { \ 393 if (rp1[_i] > col) break; \ 394 if (rp1[_i] == col) { \ 395 if (addv == ADD_VALUES) { \ 396 ap1[_i] += value; \ 397 /* Not sure LogFlops will slow dow the code or not */ \ 398 (void)PetscLogFlops(1.0); \ 399 } else ap1[_i] = value; \ 400 goto a_noinsert; \ 401 } \ 402 } \ 403 if (value == 0.0 && ignorezeroentries && row != col) { \ 404 low1 = 0; \ 405 high1 = nrow1; \ 406 goto a_noinsert; \ 407 } \ 408 if (nonew == 1) { \ 409 low1 = 0; \ 410 high1 = nrow1; \ 411 goto a_noinsert; \ 412 } \ 413 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 414 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 415 N = nrow1++ - 1; \ 416 a->nz++; \ 417 high1++; \ 418 /* shift up all the later entries in this row */ \ 419 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 420 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 421 rp1[_i] = col; \ 422 ap1[_i] = value; \ 423 a_noinsert:; \ 424 ailen[row] = nrow1; \ 425 } while (0) 426 427 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 428 do { \ 429 if (col <= lastcol2) low2 = 0; \ 430 else high2 = nrow2; \ 431 lastcol2 = col; \ 432 while (high2 - low2 > 5) { \ 433 t = (low2 + high2) / 2; \ 434 if (rp2[t] > col) high2 = t; \ 435 else low2 = t; \ 436 } \ 437 for (_i = low2; _i < high2; _i++) { \ 438 if (rp2[_i] > col) break; \ 439 if (rp2[_i] == col) { \ 440 if (addv == ADD_VALUES) { \ 441 ap2[_i] += value; \ 442 (void)PetscLogFlops(1.0); \ 443 } else ap2[_i] = value; \ 444 goto b_noinsert; \ 445 } \ 446 } \ 447 if (value == 0.0 && ignorezeroentries) { \ 448 low2 = 0; \ 449 high2 = nrow2; \ 450 goto b_noinsert; \ 451 } \ 452 if (nonew == 1) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 458 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 459 N = nrow2++ - 1; \ 460 b->nz++; \ 461 high2++; \ 462 /* shift up all the later entries in this row */ \ 463 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 464 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 465 rp2[_i] = col; \ 466 ap2[_i] = value; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 584 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_INT_MAX) header[3] = PETSC_INT_MAX; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 break; 1690 case MAT_SUBMAT_SINGLEIS: 1691 A->submat_singleis = flg; 1692 break; 1693 case MAT_STRUCTURE_ONLY: 1694 /* The option is handled directly by MatSetOption() */ 1695 break; 1696 default: 1697 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1698 } 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1703 { 1704 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1705 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1706 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1707 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1708 PetscInt *cmap, *idx_p; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1712 mat->getrowactive = PETSC_TRUE; 1713 1714 if (!mat->rowvalues && (idx || v)) { 1715 /* 1716 allocate enough space to hold information from the longest row. 1717 */ 1718 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1719 PetscInt max = 1, tmp; 1720 for (i = 0; i < matin->rmap->n; i++) { 1721 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1722 if (max < tmp) max = tmp; 1723 } 1724 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1725 } 1726 1727 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1728 lrow = row - rstart; 1729 1730 pvA = &vworkA; 1731 pcA = &cworkA; 1732 pvB = &vworkB; 1733 pcB = &cworkB; 1734 if (!v) { 1735 pvA = NULL; 1736 pvB = NULL; 1737 } 1738 if (!idx) { 1739 pcA = NULL; 1740 if (!v) pcB = NULL; 1741 } 1742 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1743 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1744 nztot = nzA + nzB; 1745 1746 cmap = mat->garray; 1747 if (v || idx) { 1748 if (nztot) { 1749 /* Sort by increasing column numbers, assuming A and B already sorted */ 1750 PetscInt imark = -1; 1751 if (v) { 1752 *v = v_p = mat->rowvalues; 1753 for (i = 0; i < nzB; i++) { 1754 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1755 else break; 1756 } 1757 imark = i; 1758 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1759 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1760 } 1761 if (idx) { 1762 *idx = idx_p = mat->rowindices; 1763 if (imark > -1) { 1764 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1765 } else { 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1768 else break; 1769 } 1770 imark = i; 1771 } 1772 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1773 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1774 } 1775 } else { 1776 if (idx) *idx = NULL; 1777 if (v) *v = NULL; 1778 } 1779 } 1780 *nz = nztot; 1781 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1787 { 1788 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1789 1790 PetscFunctionBegin; 1791 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1792 aij->getrowactive = PETSC_FALSE; 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1800 PetscInt i, j, cstart = mat->cmap->rstart; 1801 PetscReal sum = 0.0; 1802 const MatScalar *v, *amata, *bmata; 1803 PetscMPIInt iN; 1804 1805 PetscFunctionBegin; 1806 if (aij->size == 1) { 1807 PetscCall(MatNorm(aij->A, type, norm)); 1808 } else { 1809 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1810 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1811 if (type == NORM_FROBENIUS) { 1812 v = amata; 1813 for (i = 0; i < amat->nz; i++) { 1814 sum += PetscRealPart(PetscConj(*v) * (*v)); 1815 v++; 1816 } 1817 v = bmata; 1818 for (i = 0; i < bmat->nz; i++) { 1819 sum += PetscRealPart(PetscConj(*v) * (*v)); 1820 v++; 1821 } 1822 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1823 *norm = PetscSqrtReal(*norm); 1824 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1825 } else if (type == NORM_1) { /* max column norm */ 1826 PetscReal *tmp, *tmp2; 1827 PetscInt *jj, *garray = aij->garray; 1828 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1829 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1830 *norm = 0.0; 1831 v = amata; 1832 jj = amat->j; 1833 for (j = 0; j < amat->nz; j++) { 1834 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1835 v++; 1836 } 1837 v = bmata; 1838 jj = bmat->j; 1839 for (j = 0; j < bmat->nz; j++) { 1840 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1841 v++; 1842 } 1843 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1844 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1845 for (j = 0; j < mat->cmap->N; j++) { 1846 if (tmp2[j] > *norm) *norm = tmp2[j]; 1847 } 1848 PetscCall(PetscFree(tmp)); 1849 PetscCall(PetscFree(tmp2)); 1850 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1851 } else if (type == NORM_INFINITY) { /* max row norm */ 1852 PetscReal ntemp = 0.0; 1853 for (j = 0; j < aij->A->rmap->n; j++) { 1854 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1855 sum = 0.0; 1856 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1857 sum += PetscAbsScalar(*v); 1858 v++; 1859 } 1860 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1861 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1862 sum += PetscAbsScalar(*v); 1863 v++; 1864 } 1865 if (sum > ntemp) ntemp = sum; 1866 } 1867 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1868 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1869 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1870 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1871 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1872 } 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1877 { 1878 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1879 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1880 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1881 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1882 Mat B, A_diag, *B_diag; 1883 const MatScalar *pbv, *bv; 1884 1885 PetscFunctionBegin; 1886 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1887 ma = A->rmap->n; 1888 na = A->cmap->n; 1889 mb = a->B->rmap->n; 1890 nb = a->B->cmap->n; 1891 ai = Aloc->i; 1892 aj = Aloc->j; 1893 bi = Bloc->i; 1894 bj = Bloc->j; 1895 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1896 PetscInt *d_nnz, *g_nnz, *o_nnz; 1897 PetscSFNode *oloc; 1898 PETSC_UNUSED PetscSF sf; 1899 1900 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1901 /* compute d_nnz for preallocation */ 1902 PetscCall(PetscArrayzero(d_nnz, na)); 1903 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1904 /* compute local off-diagonal contributions */ 1905 PetscCall(PetscArrayzero(g_nnz, nb)); 1906 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1907 /* map those to global */ 1908 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1909 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1910 PetscCall(PetscSFSetFromOptions(sf)); 1911 PetscCall(PetscArrayzero(o_nnz, na)); 1912 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1913 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1914 PetscCall(PetscSFDestroy(&sf)); 1915 1916 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1917 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1918 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1919 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1920 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1921 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1922 } else { 1923 B = *matout; 1924 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1925 } 1926 1927 b = (Mat_MPIAIJ *)B->data; 1928 A_diag = a->A; 1929 B_diag = &b->A; 1930 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1931 A_diag_ncol = A_diag->cmap->N; 1932 B_diag_ilen = sub_B_diag->ilen; 1933 B_diag_i = sub_B_diag->i; 1934 1935 /* Set ilen for diagonal of B */ 1936 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1937 1938 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1939 very quickly (=without using MatSetValues), because all writes are local. */ 1940 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1941 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1942 1943 /* copy over the B part */ 1944 PetscCall(PetscMalloc1(bi[mb], &cols)); 1945 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1946 pbv = bv; 1947 row = A->rmap->rstart; 1948 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1949 cols_tmp = cols; 1950 for (i = 0; i < mb; i++) { 1951 ncol = bi[i + 1] - bi[i]; 1952 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1953 row++; 1954 if (pbv) pbv += ncol; 1955 if (cols_tmp) cols_tmp += ncol; 1956 } 1957 PetscCall(PetscFree(cols)); 1958 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1959 1960 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1961 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1962 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1963 *matout = B; 1964 } else { 1965 PetscCall(MatHeaderMerge(A, &B)); 1966 } 1967 PetscFunctionReturn(PETSC_SUCCESS); 1968 } 1969 1970 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1971 { 1972 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1973 Mat a = aij->A, b = aij->B; 1974 PetscInt s1, s2, s3; 1975 1976 PetscFunctionBegin; 1977 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1978 if (rr) { 1979 PetscCall(VecGetLocalSize(rr, &s1)); 1980 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1981 /* Overlap communication with computation. */ 1982 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1983 } 1984 if (ll) { 1985 PetscCall(VecGetLocalSize(ll, &s1)); 1986 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1987 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1988 } 1989 /* scale the diagonal block */ 1990 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1991 1992 if (rr) { 1993 /* Do a scatter end and then right scale the off-diagonal block */ 1994 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1995 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1996 } 1997 PetscFunctionReturn(PETSC_SUCCESS); 1998 } 1999 2000 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2001 { 2002 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2003 2004 PetscFunctionBegin; 2005 PetscCall(MatSetUnfactored(a->A)); 2006 PetscFunctionReturn(PETSC_SUCCESS); 2007 } 2008 2009 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2010 { 2011 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2012 Mat a, b, c, d; 2013 PetscBool flg; 2014 2015 PetscFunctionBegin; 2016 a = matA->A; 2017 b = matA->B; 2018 c = matB->A; 2019 d = matB->B; 2020 2021 PetscCall(MatEqual(a, c, &flg)); 2022 if (flg) PetscCall(MatEqual(b, d, &flg)); 2023 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2024 PetscFunctionReturn(PETSC_SUCCESS); 2025 } 2026 2027 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2028 { 2029 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2030 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2031 2032 PetscFunctionBegin; 2033 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2034 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2035 /* because of the column compression in the off-processor part of the matrix a->B, 2036 the number of columns in a->B and b->B may be different, hence we cannot call 2037 the MatCopy() directly on the two parts. If need be, we can provide a more 2038 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2039 then copying the submatrices */ 2040 PetscCall(MatCopy_Basic(A, B, str)); 2041 } else { 2042 PetscCall(MatCopy(a->A, b->A, str)); 2043 PetscCall(MatCopy(a->B, b->B, str)); 2044 } 2045 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2046 PetscFunctionReturn(PETSC_SUCCESS); 2047 } 2048 2049 /* 2050 Computes the number of nonzeros per row needed for preallocation when X and Y 2051 have different nonzero structure. 2052 */ 2053 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2054 { 2055 PetscInt i, j, k, nzx, nzy; 2056 2057 PetscFunctionBegin; 2058 /* Set the number of nonzeros in the new matrix */ 2059 for (i = 0; i < m; i++) { 2060 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2061 nzx = xi[i + 1] - xi[i]; 2062 nzy = yi[i + 1] - yi[i]; 2063 nnz[i] = 0; 2064 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2065 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2066 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2067 nnz[i]++; 2068 } 2069 for (; k < nzy; k++) nnz[i]++; 2070 } 2071 PetscFunctionReturn(PETSC_SUCCESS); 2072 } 2073 2074 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2075 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2076 { 2077 PetscInt m = Y->rmap->N; 2078 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2079 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2080 2081 PetscFunctionBegin; 2082 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2087 { 2088 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2089 2090 PetscFunctionBegin; 2091 if (str == SAME_NONZERO_PATTERN) { 2092 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2093 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2094 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2095 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2096 } else { 2097 Mat B; 2098 PetscInt *nnz_d, *nnz_o; 2099 2100 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2101 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2102 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2103 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2104 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2105 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2106 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2107 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2108 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2109 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2110 PetscCall(MatHeaderMerge(Y, &B)); 2111 PetscCall(PetscFree(nnz_d)); 2112 PetscCall(PetscFree(nnz_o)); 2113 } 2114 PetscFunctionReturn(PETSC_SUCCESS); 2115 } 2116 2117 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2118 2119 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2120 { 2121 PetscFunctionBegin; 2122 if (PetscDefined(USE_COMPLEX)) { 2123 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2124 2125 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2126 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2127 } 2128 PetscFunctionReturn(PETSC_SUCCESS); 2129 } 2130 2131 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2132 { 2133 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2134 2135 PetscFunctionBegin; 2136 PetscCall(MatRealPart(a->A)); 2137 PetscCall(MatRealPart(a->B)); 2138 PetscFunctionReturn(PETSC_SUCCESS); 2139 } 2140 2141 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2142 { 2143 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2144 2145 PetscFunctionBegin; 2146 PetscCall(MatImaginaryPart(a->A)); 2147 PetscCall(MatImaginaryPart(a->B)); 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2152 { 2153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2154 PetscInt i, *idxb = NULL, m = A->rmap->n; 2155 PetscScalar *vv; 2156 Vec vB, vA; 2157 const PetscScalar *va, *vb; 2158 2159 PetscFunctionBegin; 2160 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2161 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2162 2163 PetscCall(VecGetArrayRead(vA, &va)); 2164 if (idx) { 2165 for (i = 0; i < m; i++) { 2166 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2167 } 2168 } 2169 2170 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2171 PetscCall(PetscMalloc1(m, &idxb)); 2172 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2173 2174 PetscCall(VecGetArrayWrite(v, &vv)); 2175 PetscCall(VecGetArrayRead(vB, &vb)); 2176 for (i = 0; i < m; i++) { 2177 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2178 vv[i] = vb[i]; 2179 if (idx) idx[i] = a->garray[idxb[i]]; 2180 } else { 2181 vv[i] = va[i]; 2182 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2183 } 2184 } 2185 PetscCall(VecRestoreArrayWrite(v, &vv)); 2186 PetscCall(VecRestoreArrayRead(vA, &va)); 2187 PetscCall(VecRestoreArrayRead(vB, &vb)); 2188 PetscCall(PetscFree(idxb)); 2189 PetscCall(VecDestroy(&vA)); 2190 PetscCall(VecDestroy(&vB)); 2191 PetscFunctionReturn(PETSC_SUCCESS); 2192 } 2193 2194 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2195 { 2196 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2197 Vec vB, vA; 2198 2199 PetscFunctionBegin; 2200 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2201 PetscCall(MatGetRowSumAbs(a->A, vA)); 2202 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2203 PetscCall(MatGetRowSumAbs(a->B, vB)); 2204 PetscCall(VecAXPY(vA, 1.0, vB)); 2205 PetscCall(VecDestroy(&vB)); 2206 PetscCall(VecCopy(vA, v)); 2207 PetscCall(VecDestroy(&vA)); 2208 PetscFunctionReturn(PETSC_SUCCESS); 2209 } 2210 2211 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2212 { 2213 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2214 PetscInt m = A->rmap->n, n = A->cmap->n; 2215 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2216 PetscInt *cmap = mat->garray; 2217 PetscInt *diagIdx, *offdiagIdx; 2218 Vec diagV, offdiagV; 2219 PetscScalar *a, *diagA, *offdiagA; 2220 const PetscScalar *ba, *bav; 2221 PetscInt r, j, col, ncols, *bi, *bj; 2222 Mat B = mat->B; 2223 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2224 2225 PetscFunctionBegin; 2226 /* When a process holds entire A and other processes have no entry */ 2227 if (A->cmap->N == n) { 2228 PetscCall(VecGetArrayWrite(v, &diagA)); 2229 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2230 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2231 PetscCall(VecDestroy(&diagV)); 2232 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2233 PetscFunctionReturn(PETSC_SUCCESS); 2234 } else if (n == 0) { 2235 if (m) { 2236 PetscCall(VecGetArrayWrite(v, &a)); 2237 for (r = 0; r < m; r++) { 2238 a[r] = 0.0; 2239 if (idx) idx[r] = -1; 2240 } 2241 PetscCall(VecRestoreArrayWrite(v, &a)); 2242 } 2243 PetscFunctionReturn(PETSC_SUCCESS); 2244 } 2245 2246 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2250 2251 /* Get offdiagIdx[] for implicit 0.0 */ 2252 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2253 ba = bav; 2254 bi = b->i; 2255 bj = b->j; 2256 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2257 for (r = 0; r < m; r++) { 2258 ncols = bi[r + 1] - bi[r]; 2259 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2260 offdiagA[r] = *ba; 2261 offdiagIdx[r] = cmap[0]; 2262 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2263 offdiagA[r] = 0.0; 2264 2265 /* Find first hole in the cmap */ 2266 for (j = 0; j < ncols; j++) { 2267 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2268 if (col > j && j < cstart) { 2269 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2270 break; 2271 } else if (col > j + n && j >= cstart) { 2272 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2273 break; 2274 } 2275 } 2276 if (j == ncols && ncols < A->cmap->N - n) { 2277 /* a hole is outside compressed Bcols */ 2278 if (ncols == 0) { 2279 if (cstart) { 2280 offdiagIdx[r] = 0; 2281 } else offdiagIdx[r] = cend; 2282 } else { /* ncols > 0 */ 2283 offdiagIdx[r] = cmap[ncols - 1] + 1; 2284 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2285 } 2286 } 2287 } 2288 2289 for (j = 0; j < ncols; j++) { 2290 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2291 offdiagA[r] = *ba; 2292 offdiagIdx[r] = cmap[*bj]; 2293 } 2294 ba++; 2295 bj++; 2296 } 2297 } 2298 2299 PetscCall(VecGetArrayWrite(v, &a)); 2300 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2301 for (r = 0; r < m; ++r) { 2302 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2303 a[r] = diagA[r]; 2304 if (idx) idx[r] = cstart + diagIdx[r]; 2305 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2306 a[r] = diagA[r]; 2307 if (idx) { 2308 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2309 idx[r] = cstart + diagIdx[r]; 2310 } else idx[r] = offdiagIdx[r]; 2311 } 2312 } else { 2313 a[r] = offdiagA[r]; 2314 if (idx) idx[r] = offdiagIdx[r]; 2315 } 2316 } 2317 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2318 PetscCall(VecRestoreArrayWrite(v, &a)); 2319 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2321 PetscCall(VecDestroy(&diagV)); 2322 PetscCall(VecDestroy(&offdiagV)); 2323 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2324 PetscFunctionReturn(PETSC_SUCCESS); 2325 } 2326 2327 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2328 { 2329 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2330 PetscInt m = A->rmap->n, n = A->cmap->n; 2331 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2332 PetscInt *cmap = mat->garray; 2333 PetscInt *diagIdx, *offdiagIdx; 2334 Vec diagV, offdiagV; 2335 PetscScalar *a, *diagA, *offdiagA; 2336 const PetscScalar *ba, *bav; 2337 PetscInt r, j, col, ncols, *bi, *bj; 2338 Mat B = mat->B; 2339 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2340 2341 PetscFunctionBegin; 2342 /* When a process holds entire A and other processes have no entry */ 2343 if (A->cmap->N == n) { 2344 PetscCall(VecGetArrayWrite(v, &diagA)); 2345 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2346 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2347 PetscCall(VecDestroy(&diagV)); 2348 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2349 PetscFunctionReturn(PETSC_SUCCESS); 2350 } else if (n == 0) { 2351 if (m) { 2352 PetscCall(VecGetArrayWrite(v, &a)); 2353 for (r = 0; r < m; r++) { 2354 a[r] = PETSC_MAX_REAL; 2355 if (idx) idx[r] = -1; 2356 } 2357 PetscCall(VecRestoreArrayWrite(v, &a)); 2358 } 2359 PetscFunctionReturn(PETSC_SUCCESS); 2360 } 2361 2362 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2363 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2364 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2366 2367 /* Get offdiagIdx[] for implicit 0.0 */ 2368 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2369 ba = bav; 2370 bi = b->i; 2371 bj = b->j; 2372 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2373 for (r = 0; r < m; r++) { 2374 ncols = bi[r + 1] - bi[r]; 2375 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2376 offdiagA[r] = *ba; 2377 offdiagIdx[r] = cmap[0]; 2378 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2379 offdiagA[r] = 0.0; 2380 2381 /* Find first hole in the cmap */ 2382 for (j = 0; j < ncols; j++) { 2383 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2384 if (col > j && j < cstart) { 2385 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2386 break; 2387 } else if (col > j + n && j >= cstart) { 2388 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2389 break; 2390 } 2391 } 2392 if (j == ncols && ncols < A->cmap->N - n) { 2393 /* a hole is outside compressed Bcols */ 2394 if (ncols == 0) { 2395 if (cstart) { 2396 offdiagIdx[r] = 0; 2397 } else offdiagIdx[r] = cend; 2398 } else { /* ncols > 0 */ 2399 offdiagIdx[r] = cmap[ncols - 1] + 1; 2400 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2401 } 2402 } 2403 } 2404 2405 for (j = 0; j < ncols; j++) { 2406 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2407 offdiagA[r] = *ba; 2408 offdiagIdx[r] = cmap[*bj]; 2409 } 2410 ba++; 2411 bj++; 2412 } 2413 } 2414 2415 PetscCall(VecGetArrayWrite(v, &a)); 2416 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2417 for (r = 0; r < m; ++r) { 2418 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) idx[r] = cstart + diagIdx[r]; 2421 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2422 a[r] = diagA[r]; 2423 if (idx) { 2424 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2425 idx[r] = cstart + diagIdx[r]; 2426 } else idx[r] = offdiagIdx[r]; 2427 } 2428 } else { 2429 a[r] = offdiagA[r]; 2430 if (idx) idx[r] = offdiagIdx[r]; 2431 } 2432 } 2433 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2434 PetscCall(VecRestoreArrayWrite(v, &a)); 2435 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2437 PetscCall(VecDestroy(&diagV)); 2438 PetscCall(VecDestroy(&offdiagV)); 2439 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2440 PetscFunctionReturn(PETSC_SUCCESS); 2441 } 2442 2443 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2444 { 2445 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2446 PetscInt m = A->rmap->n, n = A->cmap->n; 2447 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2448 PetscInt *cmap = mat->garray; 2449 PetscInt *diagIdx, *offdiagIdx; 2450 Vec diagV, offdiagV; 2451 PetscScalar *a, *diagA, *offdiagA; 2452 const PetscScalar *ba, *bav; 2453 PetscInt r, j, col, ncols, *bi, *bj; 2454 Mat B = mat->B; 2455 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2456 2457 PetscFunctionBegin; 2458 /* When a process holds entire A and other processes have no entry */ 2459 if (A->cmap->N == n) { 2460 PetscCall(VecGetArrayWrite(v, &diagA)); 2461 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2462 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2463 PetscCall(VecDestroy(&diagV)); 2464 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2465 PetscFunctionReturn(PETSC_SUCCESS); 2466 } else if (n == 0) { 2467 if (m) { 2468 PetscCall(VecGetArrayWrite(v, &a)); 2469 for (r = 0; r < m; r++) { 2470 a[r] = PETSC_MIN_REAL; 2471 if (idx) idx[r] = -1; 2472 } 2473 PetscCall(VecRestoreArrayWrite(v, &a)); 2474 } 2475 PetscFunctionReturn(PETSC_SUCCESS); 2476 } 2477 2478 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2479 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2480 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2482 2483 /* Get offdiagIdx[] for implicit 0.0 */ 2484 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2485 ba = bav; 2486 bi = b->i; 2487 bj = b->j; 2488 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2489 for (r = 0; r < m; r++) { 2490 ncols = bi[r + 1] - bi[r]; 2491 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2492 offdiagA[r] = *ba; 2493 offdiagIdx[r] = cmap[0]; 2494 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2495 offdiagA[r] = 0.0; 2496 2497 /* Find first hole in the cmap */ 2498 for (j = 0; j < ncols; j++) { 2499 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2500 if (col > j && j < cstart) { 2501 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2502 break; 2503 } else if (col > j + n && j >= cstart) { 2504 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2505 break; 2506 } 2507 } 2508 if (j == ncols && ncols < A->cmap->N - n) { 2509 /* a hole is outside compressed Bcols */ 2510 if (ncols == 0) { 2511 if (cstart) { 2512 offdiagIdx[r] = 0; 2513 } else offdiagIdx[r] = cend; 2514 } else { /* ncols > 0 */ 2515 offdiagIdx[r] = cmap[ncols - 1] + 1; 2516 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2517 } 2518 } 2519 } 2520 2521 for (j = 0; j < ncols; j++) { 2522 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2523 offdiagA[r] = *ba; 2524 offdiagIdx[r] = cmap[*bj]; 2525 } 2526 ba++; 2527 bj++; 2528 } 2529 } 2530 2531 PetscCall(VecGetArrayWrite(v, &a)); 2532 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2533 for (r = 0; r < m; ++r) { 2534 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2535 a[r] = diagA[r]; 2536 if (idx) idx[r] = cstart + diagIdx[r]; 2537 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2538 a[r] = diagA[r]; 2539 if (idx) { 2540 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2541 idx[r] = cstart + diagIdx[r]; 2542 } else idx[r] = offdiagIdx[r]; 2543 } 2544 } else { 2545 a[r] = offdiagA[r]; 2546 if (idx) idx[r] = offdiagIdx[r]; 2547 } 2548 } 2549 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2550 PetscCall(VecRestoreArrayWrite(v, &a)); 2551 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2553 PetscCall(VecDestroy(&diagV)); 2554 PetscCall(VecDestroy(&offdiagV)); 2555 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2556 PetscFunctionReturn(PETSC_SUCCESS); 2557 } 2558 2559 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2560 { 2561 Mat *dummy; 2562 2563 PetscFunctionBegin; 2564 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2565 *newmat = *dummy; 2566 PetscCall(PetscFree(dummy)); 2567 PetscFunctionReturn(PETSC_SUCCESS); 2568 } 2569 2570 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2571 { 2572 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2573 2574 PetscFunctionBegin; 2575 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2576 A->factorerrortype = a->A->factorerrortype; 2577 PetscFunctionReturn(PETSC_SUCCESS); 2578 } 2579 2580 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2581 { 2582 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2583 2584 PetscFunctionBegin; 2585 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2586 PetscCall(MatSetRandom(aij->A, rctx)); 2587 if (x->assembled) { 2588 PetscCall(MatSetRandom(aij->B, rctx)); 2589 } else { 2590 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2591 } 2592 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2593 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2598 { 2599 PetscFunctionBegin; 2600 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2601 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2602 PetscFunctionReturn(PETSC_SUCCESS); 2603 } 2604 2605 /*@ 2606 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2607 2608 Not Collective 2609 2610 Input Parameter: 2611 . A - the matrix 2612 2613 Output Parameter: 2614 . nz - the number of nonzeros 2615 2616 Level: advanced 2617 2618 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2619 @*/ 2620 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2621 { 2622 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2623 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2624 PetscBool isaij; 2625 2626 PetscFunctionBegin; 2627 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2628 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2629 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2630 PetscFunctionReturn(PETSC_SUCCESS); 2631 } 2632 2633 /*@ 2634 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2635 2636 Collective 2637 2638 Input Parameters: 2639 + A - the matrix 2640 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2641 2642 Level: advanced 2643 2644 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2645 @*/ 2646 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2647 { 2648 PetscFunctionBegin; 2649 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2654 { 2655 PetscBool sc = PETSC_FALSE, flg; 2656 2657 PetscFunctionBegin; 2658 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2659 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2660 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2661 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2662 PetscOptionsHeadEnd(); 2663 PetscFunctionReturn(PETSC_SUCCESS); 2664 } 2665 2666 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2667 { 2668 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2669 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2670 2671 PetscFunctionBegin; 2672 if (!Y->preallocated) { 2673 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2674 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2675 PetscInt nonew = aij->nonew; 2676 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2677 aij->nonew = nonew; 2678 } 2679 PetscCall(MatShift_Basic(Y, a)); 2680 PetscFunctionReturn(PETSC_SUCCESS); 2681 } 2682 2683 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2684 { 2685 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2686 2687 PetscFunctionBegin; 2688 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2689 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2690 if (d) { 2691 PetscInt rstart; 2692 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2693 *d += rstart; 2694 } 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2704 PetscFunctionReturn(PETSC_SUCCESS); 2705 } 2706 2707 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2708 { 2709 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2710 2711 PetscFunctionBegin; 2712 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2713 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2718 MatGetRow_MPIAIJ, 2719 MatRestoreRow_MPIAIJ, 2720 MatMult_MPIAIJ, 2721 /* 4*/ MatMultAdd_MPIAIJ, 2722 MatMultTranspose_MPIAIJ, 2723 MatMultTransposeAdd_MPIAIJ, 2724 NULL, 2725 NULL, 2726 NULL, 2727 /*10*/ NULL, 2728 NULL, 2729 NULL, 2730 MatSOR_MPIAIJ, 2731 MatTranspose_MPIAIJ, 2732 /*15*/ MatGetInfo_MPIAIJ, 2733 MatEqual_MPIAIJ, 2734 MatGetDiagonal_MPIAIJ, 2735 MatDiagonalScale_MPIAIJ, 2736 MatNorm_MPIAIJ, 2737 /*20*/ MatAssemblyBegin_MPIAIJ, 2738 MatAssemblyEnd_MPIAIJ, 2739 MatSetOption_MPIAIJ, 2740 MatZeroEntries_MPIAIJ, 2741 /*24*/ MatZeroRows_MPIAIJ, 2742 NULL, 2743 NULL, 2744 NULL, 2745 NULL, 2746 /*29*/ MatSetUp_MPI_Hash, 2747 NULL, 2748 NULL, 2749 MatGetDiagonalBlock_MPIAIJ, 2750 NULL, 2751 /*34*/ MatDuplicate_MPIAIJ, 2752 NULL, 2753 NULL, 2754 NULL, 2755 NULL, 2756 /*39*/ MatAXPY_MPIAIJ, 2757 MatCreateSubMatrices_MPIAIJ, 2758 MatIncreaseOverlap_MPIAIJ, 2759 MatGetValues_MPIAIJ, 2760 MatCopy_MPIAIJ, 2761 /*44*/ MatGetRowMax_MPIAIJ, 2762 MatScale_MPIAIJ, 2763 MatShift_MPIAIJ, 2764 MatDiagonalSet_MPIAIJ, 2765 MatZeroRowsColumns_MPIAIJ, 2766 /*49*/ MatSetRandom_MPIAIJ, 2767 MatGetRowIJ_MPIAIJ, 2768 MatRestoreRowIJ_MPIAIJ, 2769 NULL, 2770 NULL, 2771 /*54*/ MatFDColoringCreate_MPIXAIJ, 2772 NULL, 2773 MatSetUnfactored_MPIAIJ, 2774 MatPermute_MPIAIJ, 2775 NULL, 2776 /*59*/ MatCreateSubMatrix_MPIAIJ, 2777 MatDestroy_MPIAIJ, 2778 MatView_MPIAIJ, 2779 NULL, 2780 NULL, 2781 /*64*/ NULL, 2782 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2783 NULL, 2784 NULL, 2785 NULL, 2786 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2787 MatGetRowMinAbs_MPIAIJ, 2788 NULL, 2789 NULL, 2790 NULL, 2791 NULL, 2792 /*75*/ MatFDColoringApply_AIJ, 2793 MatSetFromOptions_MPIAIJ, 2794 NULL, 2795 NULL, 2796 MatFindZeroDiagonals_MPIAIJ, 2797 /*80*/ NULL, 2798 NULL, 2799 NULL, 2800 /*83*/ MatLoad_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*89*/ NULL, 2807 NULL, 2808 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2809 NULL, 2810 NULL, 2811 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2812 NULL, 2813 NULL, 2814 NULL, 2815 MatBindToCPU_MPIAIJ, 2816 /*99*/ MatProductSetFromOptions_MPIAIJ, 2817 NULL, 2818 NULL, 2819 MatConjugate_MPIAIJ, 2820 NULL, 2821 /*104*/ MatSetValuesRow_MPIAIJ, 2822 MatRealPart_MPIAIJ, 2823 MatImaginaryPart_MPIAIJ, 2824 NULL, 2825 NULL, 2826 /*109*/ NULL, 2827 NULL, 2828 MatGetRowMin_MPIAIJ, 2829 NULL, 2830 MatMissingDiagonal_MPIAIJ, 2831 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2832 NULL, 2833 MatGetGhosts_MPIAIJ, 2834 NULL, 2835 NULL, 2836 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2837 NULL, 2838 NULL, 2839 NULL, 2840 MatGetMultiProcBlock_MPIAIJ, 2841 /*124*/ MatFindNonzeroRows_MPIAIJ, 2842 MatGetColumnReductions_MPIAIJ, 2843 MatInvertBlockDiagonal_MPIAIJ, 2844 MatInvertVariableBlockDiagonal_MPIAIJ, 2845 MatCreateSubMatricesMPI_MPIAIJ, 2846 /*129*/ NULL, 2847 NULL, 2848 NULL, 2849 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2850 NULL, 2851 /*134*/ NULL, 2852 NULL, 2853 NULL, 2854 NULL, 2855 NULL, 2856 /*139*/ MatSetBlockSizes_MPIAIJ, 2857 NULL, 2858 NULL, 2859 MatFDColoringSetUp_MPIXAIJ, 2860 MatFindOffBlockDiagonalEntries_MPIAIJ, 2861 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2862 /*145*/ NULL, 2863 NULL, 2864 NULL, 2865 MatCreateGraph_Simple_AIJ, 2866 NULL, 2867 /*150*/ NULL, 2868 MatEliminateZeros_MPIAIJ, 2869 MatGetRowSumAbs_MPIAIJ, 2870 NULL, 2871 NULL, 2872 NULL}; 2873 2874 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2875 { 2876 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2877 2878 PetscFunctionBegin; 2879 PetscCall(MatStoreValues(aij->A)); 2880 PetscCall(MatStoreValues(aij->B)); 2881 PetscFunctionReturn(PETSC_SUCCESS); 2882 } 2883 2884 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2885 { 2886 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2887 2888 PetscFunctionBegin; 2889 PetscCall(MatRetrieveValues(aij->A)); 2890 PetscCall(MatRetrieveValues(aij->B)); 2891 PetscFunctionReturn(PETSC_SUCCESS); 2892 } 2893 2894 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2895 { 2896 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2897 PetscMPIInt size; 2898 2899 PetscFunctionBegin; 2900 if (B->hash_active) { 2901 B->ops[0] = b->cops; 2902 B->hash_active = PETSC_FALSE; 2903 } 2904 PetscCall(PetscLayoutSetUp(B->rmap)); 2905 PetscCall(PetscLayoutSetUp(B->cmap)); 2906 2907 #if defined(PETSC_USE_CTABLE) 2908 PetscCall(PetscHMapIDestroy(&b->colmap)); 2909 #else 2910 PetscCall(PetscFree(b->colmap)); 2911 #endif 2912 PetscCall(PetscFree(b->garray)); 2913 PetscCall(VecDestroy(&b->lvec)); 2914 PetscCall(VecScatterDestroy(&b->Mvctx)); 2915 2916 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2917 2918 MatSeqXAIJGetOptions_Private(b->B); 2919 PetscCall(MatDestroy(&b->B)); 2920 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2921 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2922 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2923 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2924 MatSeqXAIJRestoreOptions_Private(b->B); 2925 2926 MatSeqXAIJGetOptions_Private(b->A); 2927 PetscCall(MatDestroy(&b->A)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2929 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2930 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2931 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2932 MatSeqXAIJRestoreOptions_Private(b->A); 2933 2934 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2935 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2936 B->preallocated = PETSC_TRUE; 2937 B->was_assembled = PETSC_FALSE; 2938 B->assembled = PETSC_FALSE; 2939 PetscFunctionReturn(PETSC_SUCCESS); 2940 } 2941 2942 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2943 { 2944 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2945 2946 PetscFunctionBegin; 2947 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2948 PetscCall(PetscLayoutSetUp(B->rmap)); 2949 PetscCall(PetscLayoutSetUp(B->cmap)); 2950 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2951 else { 2952 #if defined(PETSC_USE_CTABLE) 2953 PetscCall(PetscHMapIDestroy(&b->colmap)); 2954 #else 2955 PetscCall(PetscFree(b->colmap)); 2956 #endif 2957 PetscCall(PetscFree(b->garray)); 2958 PetscCall(VecDestroy(&b->lvec)); 2959 } 2960 PetscCall(VecScatterDestroy(&b->Mvctx)); 2961 2962 PetscCall(MatResetPreallocation(b->A)); 2963 PetscCall(MatResetPreallocation(b->B)); 2964 B->preallocated = PETSC_TRUE; 2965 B->was_assembled = PETSC_FALSE; 2966 B->assembled = PETSC_FALSE; 2967 PetscFunctionReturn(PETSC_SUCCESS); 2968 } 2969 2970 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2971 { 2972 Mat mat; 2973 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2974 2975 PetscFunctionBegin; 2976 *newmat = NULL; 2977 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2978 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2979 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2980 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2981 a = (Mat_MPIAIJ *)mat->data; 2982 2983 mat->factortype = matin->factortype; 2984 mat->assembled = matin->assembled; 2985 mat->insertmode = NOT_SET_VALUES; 2986 2987 a->size = oldmat->size; 2988 a->rank = oldmat->rank; 2989 a->donotstash = oldmat->donotstash; 2990 a->roworiented = oldmat->roworiented; 2991 a->rowindices = NULL; 2992 a->rowvalues = NULL; 2993 a->getrowactive = PETSC_FALSE; 2994 2995 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2996 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2997 if (matin->hash_active) { 2998 PetscCall(MatSetUp(mat)); 2999 } else { 3000 mat->preallocated = matin->preallocated; 3001 if (oldmat->colmap) { 3002 #if defined(PETSC_USE_CTABLE) 3003 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3004 #else 3005 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3006 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3007 #endif 3008 } else a->colmap = NULL; 3009 if (oldmat->garray) { 3010 PetscInt len; 3011 len = oldmat->B->cmap->n; 3012 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3013 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3014 } else a->garray = NULL; 3015 3016 /* It may happen MatDuplicate is called with a non-assembled matrix 3017 In fact, MatDuplicate only requires the matrix to be preallocated 3018 This may happen inside a DMCreateMatrix_Shell */ 3019 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3020 if (oldmat->Mvctx) { 3021 a->Mvctx = oldmat->Mvctx; 3022 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3023 } 3024 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3025 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3026 } 3027 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3028 *newmat = mat; 3029 PetscFunctionReturn(PETSC_SUCCESS); 3030 } 3031 3032 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3033 { 3034 PetscBool isbinary, ishdf5; 3035 3036 PetscFunctionBegin; 3037 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3038 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3039 /* force binary viewer to load .info file if it has not yet done so */ 3040 PetscCall(PetscViewerSetUp(viewer)); 3041 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3042 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3043 if (isbinary) { 3044 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3045 } else if (ishdf5) { 3046 #if defined(PETSC_HAVE_HDF5) 3047 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3048 #else 3049 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3050 #endif 3051 } else { 3052 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3053 } 3054 PetscFunctionReturn(PETSC_SUCCESS); 3055 } 3056 3057 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3058 { 3059 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3060 PetscInt *rowidxs, *colidxs; 3061 PetscScalar *matvals; 3062 3063 PetscFunctionBegin; 3064 PetscCall(PetscViewerSetUp(viewer)); 3065 3066 /* read in matrix header */ 3067 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3068 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3069 M = header[1]; 3070 N = header[2]; 3071 nz = header[3]; 3072 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3073 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3074 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3075 3076 /* set block sizes from the viewer's .info file */ 3077 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3078 /* set global sizes if not set already */ 3079 if (mat->rmap->N < 0) mat->rmap->N = M; 3080 if (mat->cmap->N < 0) mat->cmap->N = N; 3081 PetscCall(PetscLayoutSetUp(mat->rmap)); 3082 PetscCall(PetscLayoutSetUp(mat->cmap)); 3083 3084 /* check if the matrix sizes are correct */ 3085 PetscCall(MatGetSize(mat, &rows, &cols)); 3086 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3087 3088 /* read in row lengths and build row indices */ 3089 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3090 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3091 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3092 rowidxs[0] = 0; 3093 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3094 if (nz != PETSC_INT_MAX) { 3095 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3096 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3097 } 3098 3099 /* read in column indices and matrix values */ 3100 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3101 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3102 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3103 /* store matrix indices and values */ 3104 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3105 PetscCall(PetscFree(rowidxs)); 3106 PetscCall(PetscFree2(colidxs, matvals)); 3107 PetscFunctionReturn(PETSC_SUCCESS); 3108 } 3109 3110 /* Not scalable because of ISAllGather() unless getting all columns. */ 3111 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3112 { 3113 IS iscol_local; 3114 PetscBool isstride; 3115 PetscMPIInt lisstride = 0, gisstride; 3116 3117 PetscFunctionBegin; 3118 /* check if we are grabbing all columns*/ 3119 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3120 3121 if (isstride) { 3122 PetscInt start, len, mstart, mlen; 3123 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3124 PetscCall(ISGetLocalSize(iscol, &len)); 3125 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3126 if (mstart == start && mlen - mstart == len) lisstride = 1; 3127 } 3128 3129 PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3130 if (gisstride) { 3131 PetscInt N; 3132 PetscCall(MatGetSize(mat, NULL, &N)); 3133 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3134 PetscCall(ISSetIdentity(iscol_local)); 3135 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3136 } else { 3137 PetscInt cbs; 3138 PetscCall(ISGetBlockSize(iscol, &cbs)); 3139 PetscCall(ISAllGather(iscol, &iscol_local)); 3140 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3141 } 3142 3143 *isseq = iscol_local; 3144 PetscFunctionReturn(PETSC_SUCCESS); 3145 } 3146 3147 /* 3148 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3149 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3150 3151 Input Parameters: 3152 + mat - matrix 3153 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3154 i.e., mat->rstart <= isrow[i] < mat->rend 3155 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3156 i.e., mat->cstart <= iscol[i] < mat->cend 3157 3158 Output Parameters: 3159 + isrow_d - sequential row index set for retrieving mat->A 3160 . iscol_d - sequential column index set for retrieving mat->A 3161 . iscol_o - sequential column index set for retrieving mat->B 3162 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3163 */ 3164 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3165 { 3166 Vec x, cmap; 3167 const PetscInt *is_idx; 3168 PetscScalar *xarray, *cmaparray; 3169 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3170 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3171 Mat B = a->B; 3172 Vec lvec = a->lvec, lcmap; 3173 PetscInt i, cstart, cend, Bn = B->cmap->N; 3174 MPI_Comm comm; 3175 VecScatter Mvctx = a->Mvctx; 3176 3177 PetscFunctionBegin; 3178 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3179 PetscCall(ISGetLocalSize(iscol, &ncols)); 3180 3181 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3182 PetscCall(MatCreateVecs(mat, &x, NULL)); 3183 PetscCall(VecSet(x, -1.0)); 3184 PetscCall(VecDuplicate(x, &cmap)); 3185 PetscCall(VecSet(cmap, -1.0)); 3186 3187 /* Get start indices */ 3188 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3189 isstart -= ncols; 3190 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3191 3192 PetscCall(ISGetIndices(iscol, &is_idx)); 3193 PetscCall(VecGetArray(x, &xarray)); 3194 PetscCall(VecGetArray(cmap, &cmaparray)); 3195 PetscCall(PetscMalloc1(ncols, &idx)); 3196 for (i = 0; i < ncols; i++) { 3197 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3198 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3199 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3200 } 3201 PetscCall(VecRestoreArray(x, &xarray)); 3202 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3203 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3204 3205 /* Get iscol_d */ 3206 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3207 PetscCall(ISGetBlockSize(iscol, &i)); 3208 PetscCall(ISSetBlockSize(*iscol_d, i)); 3209 3210 /* Get isrow_d */ 3211 PetscCall(ISGetLocalSize(isrow, &m)); 3212 rstart = mat->rmap->rstart; 3213 PetscCall(PetscMalloc1(m, &idx)); 3214 PetscCall(ISGetIndices(isrow, &is_idx)); 3215 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3216 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3217 3218 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3219 PetscCall(ISGetBlockSize(isrow, &i)); 3220 PetscCall(ISSetBlockSize(*isrow_d, i)); 3221 3222 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3223 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3224 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3225 3226 PetscCall(VecDuplicate(lvec, &lcmap)); 3227 3228 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3229 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3230 3231 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3232 /* off-process column indices */ 3233 count = 0; 3234 PetscCall(PetscMalloc1(Bn, &idx)); 3235 PetscCall(PetscMalloc1(Bn, &cmap1)); 3236 3237 PetscCall(VecGetArray(lvec, &xarray)); 3238 PetscCall(VecGetArray(lcmap, &cmaparray)); 3239 for (i = 0; i < Bn; i++) { 3240 if (PetscRealPart(xarray[i]) > -1.0) { 3241 idx[count] = i; /* local column index in off-diagonal part B */ 3242 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3243 count++; 3244 } 3245 } 3246 PetscCall(VecRestoreArray(lvec, &xarray)); 3247 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3248 3249 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3250 /* cannot ensure iscol_o has same blocksize as iscol! */ 3251 3252 PetscCall(PetscFree(idx)); 3253 *garray = cmap1; 3254 3255 PetscCall(VecDestroy(&x)); 3256 PetscCall(VecDestroy(&cmap)); 3257 PetscCall(VecDestroy(&lcmap)); 3258 PetscFunctionReturn(PETSC_SUCCESS); 3259 } 3260 3261 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3262 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3263 { 3264 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3265 Mat M = NULL; 3266 MPI_Comm comm; 3267 IS iscol_d, isrow_d, iscol_o; 3268 Mat Asub = NULL, Bsub = NULL; 3269 PetscInt n; 3270 3271 PetscFunctionBegin; 3272 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3273 3274 if (call == MAT_REUSE_MATRIX) { 3275 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3277 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3278 3279 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3280 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3281 3282 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3283 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3284 3285 /* Update diagonal and off-diagonal portions of submat */ 3286 asub = (Mat_MPIAIJ *)(*submat)->data; 3287 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3288 PetscCall(ISGetLocalSize(iscol_o, &n)); 3289 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3290 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3291 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3292 3293 } else { /* call == MAT_INITIAL_MATRIX) */ 3294 const PetscInt *garray; 3295 PetscInt BsubN; 3296 3297 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3298 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3299 3300 /* Create local submatrices Asub and Bsub */ 3301 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3302 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3303 3304 /* Create submatrix M */ 3305 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3306 3307 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3308 asub = (Mat_MPIAIJ *)M->data; 3309 3310 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3311 n = asub->B->cmap->N; 3312 if (BsubN > n) { 3313 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3314 const PetscInt *idx; 3315 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3316 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3317 3318 PetscCall(PetscMalloc1(n, &idx_new)); 3319 j = 0; 3320 PetscCall(ISGetIndices(iscol_o, &idx)); 3321 for (i = 0; i < n; i++) { 3322 if (j >= BsubN) break; 3323 while (subgarray[i] > garray[j]) j++; 3324 3325 if (subgarray[i] == garray[j]) { 3326 idx_new[i] = idx[j++]; 3327 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3328 } 3329 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3330 3331 PetscCall(ISDestroy(&iscol_o)); 3332 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3333 3334 } else if (BsubN < n) { 3335 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3336 } 3337 3338 PetscCall(PetscFree(garray)); 3339 *submat = M; 3340 3341 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3342 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3343 PetscCall(ISDestroy(&isrow_d)); 3344 3345 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3346 PetscCall(ISDestroy(&iscol_d)); 3347 3348 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3349 PetscCall(ISDestroy(&iscol_o)); 3350 } 3351 PetscFunctionReturn(PETSC_SUCCESS); 3352 } 3353 3354 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3355 { 3356 IS iscol_local = NULL, isrow_d; 3357 PetscInt csize; 3358 PetscInt n, i, j, start, end; 3359 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3360 MPI_Comm comm; 3361 3362 PetscFunctionBegin; 3363 /* If isrow has same processor distribution as mat, 3364 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3365 if (call == MAT_REUSE_MATRIX) { 3366 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3367 if (isrow_d) { 3368 sameRowDist = PETSC_TRUE; 3369 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3370 } else { 3371 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3372 if (iscol_local) { 3373 sameRowDist = PETSC_TRUE; 3374 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3375 } 3376 } 3377 } else { 3378 /* Check if isrow has same processor distribution as mat */ 3379 sameDist[0] = PETSC_FALSE; 3380 PetscCall(ISGetLocalSize(isrow, &n)); 3381 if (!n) { 3382 sameDist[0] = PETSC_TRUE; 3383 } else { 3384 PetscCall(ISGetMinMax(isrow, &i, &j)); 3385 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3386 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3387 } 3388 3389 /* Check if iscol has same processor distribution as mat */ 3390 sameDist[1] = PETSC_FALSE; 3391 PetscCall(ISGetLocalSize(iscol, &n)); 3392 if (!n) { 3393 sameDist[1] = PETSC_TRUE; 3394 } else { 3395 PetscCall(ISGetMinMax(iscol, &i, &j)); 3396 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3397 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3398 } 3399 3400 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3401 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3402 sameRowDist = tsameDist[0]; 3403 } 3404 3405 if (sameRowDist) { 3406 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3407 /* isrow and iscol have same processor distribution as mat */ 3408 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3409 PetscFunctionReturn(PETSC_SUCCESS); 3410 } else { /* sameRowDist */ 3411 /* isrow has same processor distribution as mat */ 3412 if (call == MAT_INITIAL_MATRIX) { 3413 PetscBool sorted; 3414 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3415 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3416 PetscCall(ISGetSize(iscol, &i)); 3417 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3418 3419 PetscCall(ISSorted(iscol_local, &sorted)); 3420 if (sorted) { 3421 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3422 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3423 PetscFunctionReturn(PETSC_SUCCESS); 3424 } 3425 } else { /* call == MAT_REUSE_MATRIX */ 3426 IS iscol_sub; 3427 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3428 if (iscol_sub) { 3429 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3430 PetscFunctionReturn(PETSC_SUCCESS); 3431 } 3432 } 3433 } 3434 } 3435 3436 /* General case: iscol -> iscol_local which has global size of iscol */ 3437 if (call == MAT_REUSE_MATRIX) { 3438 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3439 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3440 } else { 3441 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3442 } 3443 3444 PetscCall(ISGetLocalSize(iscol, &csize)); 3445 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3446 3447 if (call == MAT_INITIAL_MATRIX) { 3448 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3449 PetscCall(ISDestroy(&iscol_local)); 3450 } 3451 PetscFunctionReturn(PETSC_SUCCESS); 3452 } 3453 3454 /*@C 3455 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3456 and "off-diagonal" part of the matrix in CSR format. 3457 3458 Collective 3459 3460 Input Parameters: 3461 + comm - MPI communicator 3462 . A - "diagonal" portion of matrix 3463 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3464 - garray - global index of `B` columns 3465 3466 Output Parameter: 3467 . mat - the matrix, with input `A` as its local diagonal matrix 3468 3469 Level: advanced 3470 3471 Notes: 3472 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3473 3474 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3475 3476 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3477 @*/ 3478 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3479 { 3480 Mat_MPIAIJ *maij; 3481 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3482 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3483 const PetscScalar *oa; 3484 Mat Bnew; 3485 PetscInt m, n, N; 3486 MatType mpi_mat_type; 3487 3488 PetscFunctionBegin; 3489 PetscCall(MatCreate(comm, mat)); 3490 PetscCall(MatGetSize(A, &m, &n)); 3491 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3492 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3493 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3494 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3495 3496 /* Get global columns of mat */ 3497 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3498 3499 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3500 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3501 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3502 PetscCall(MatSetType(*mat, mpi_mat_type)); 3503 3504 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3505 maij = (Mat_MPIAIJ *)(*mat)->data; 3506 3507 (*mat)->preallocated = PETSC_TRUE; 3508 3509 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3510 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3511 3512 /* Set A as diagonal portion of *mat */ 3513 maij->A = A; 3514 3515 nz = oi[m]; 3516 for (i = 0; i < nz; i++) { 3517 col = oj[i]; 3518 oj[i] = garray[col]; 3519 } 3520 3521 /* Set Bnew as off-diagonal portion of *mat */ 3522 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3523 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3524 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3525 bnew = (Mat_SeqAIJ *)Bnew->data; 3526 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3527 maij->B = Bnew; 3528 3529 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3530 3531 b->free_a = PETSC_FALSE; 3532 b->free_ij = PETSC_FALSE; 3533 PetscCall(MatDestroy(&B)); 3534 3535 bnew->free_a = PETSC_TRUE; 3536 bnew->free_ij = PETSC_TRUE; 3537 3538 /* condense columns of maij->B */ 3539 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3540 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3541 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3542 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3543 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3544 PetscFunctionReturn(PETSC_SUCCESS); 3545 } 3546 3547 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3548 3549 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3550 { 3551 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3552 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3553 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3554 Mat M, Msub, B = a->B; 3555 MatScalar *aa; 3556 Mat_SeqAIJ *aij; 3557 PetscInt *garray = a->garray, *colsub, Ncols; 3558 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3559 IS iscol_sub, iscmap; 3560 const PetscInt *is_idx, *cmap; 3561 PetscBool allcolumns = PETSC_FALSE; 3562 MPI_Comm comm; 3563 3564 PetscFunctionBegin; 3565 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3566 if (call == MAT_REUSE_MATRIX) { 3567 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3568 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3569 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3570 3571 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3572 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3573 3574 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3575 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3576 3577 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3578 3579 } else { /* call == MAT_INITIAL_MATRIX) */ 3580 PetscBool flg; 3581 3582 PetscCall(ISGetLocalSize(iscol, &n)); 3583 PetscCall(ISGetSize(iscol, &Ncols)); 3584 3585 /* (1) iscol -> nonscalable iscol_local */ 3586 /* Check for special case: each processor gets entire matrix columns */ 3587 PetscCall(ISIdentity(iscol_local, &flg)); 3588 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3589 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3590 if (allcolumns) { 3591 iscol_sub = iscol_local; 3592 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3593 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3594 3595 } else { 3596 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3597 PetscInt *idx, *cmap1, k; 3598 PetscCall(PetscMalloc1(Ncols, &idx)); 3599 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3600 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3601 count = 0; 3602 k = 0; 3603 for (i = 0; i < Ncols; i++) { 3604 j = is_idx[i]; 3605 if (j >= cstart && j < cend) { 3606 /* diagonal part of mat */ 3607 idx[count] = j; 3608 cmap1[count++] = i; /* column index in submat */ 3609 } else if (Bn) { 3610 /* off-diagonal part of mat */ 3611 if (j == garray[k]) { 3612 idx[count] = j; 3613 cmap1[count++] = i; /* column index in submat */ 3614 } else if (j > garray[k]) { 3615 while (j > garray[k] && k < Bn - 1) k++; 3616 if (j == garray[k]) { 3617 idx[count] = j; 3618 cmap1[count++] = i; /* column index in submat */ 3619 } 3620 } 3621 } 3622 } 3623 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3624 3625 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3626 PetscCall(ISGetBlockSize(iscol, &cbs)); 3627 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3628 3629 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3630 } 3631 3632 /* (3) Create sequential Msub */ 3633 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3634 } 3635 3636 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3637 aij = (Mat_SeqAIJ *)Msub->data; 3638 ii = aij->i; 3639 PetscCall(ISGetIndices(iscmap, &cmap)); 3640 3641 /* 3642 m - number of local rows 3643 Ncols - number of columns (same on all processors) 3644 rstart - first row in new global matrix generated 3645 */ 3646 PetscCall(MatGetSize(Msub, &m, NULL)); 3647 3648 if (call == MAT_INITIAL_MATRIX) { 3649 /* (4) Create parallel newmat */ 3650 PetscMPIInt rank, size; 3651 PetscInt csize; 3652 3653 PetscCallMPI(MPI_Comm_size(comm, &size)); 3654 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3655 3656 /* 3657 Determine the number of non-zeros in the diagonal and off-diagonal 3658 portions of the matrix in order to do correct preallocation 3659 */ 3660 3661 /* first get start and end of "diagonal" columns */ 3662 PetscCall(ISGetLocalSize(iscol, &csize)); 3663 if (csize == PETSC_DECIDE) { 3664 PetscCall(ISGetSize(isrow, &mglobal)); 3665 if (mglobal == Ncols) { /* square matrix */ 3666 nlocal = m; 3667 } else { 3668 nlocal = Ncols / size + ((Ncols % size) > rank); 3669 } 3670 } else { 3671 nlocal = csize; 3672 } 3673 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3674 rstart = rend - nlocal; 3675 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3676 3677 /* next, compute all the lengths */ 3678 jj = aij->j; 3679 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3680 olens = dlens + m; 3681 for (i = 0; i < m; i++) { 3682 jend = ii[i + 1] - ii[i]; 3683 olen = 0; 3684 dlen = 0; 3685 for (j = 0; j < jend; j++) { 3686 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3687 else dlen++; 3688 jj++; 3689 } 3690 olens[i] = olen; 3691 dlens[i] = dlen; 3692 } 3693 3694 PetscCall(ISGetBlockSize(isrow, &bs)); 3695 PetscCall(ISGetBlockSize(iscol, &cbs)); 3696 3697 PetscCall(MatCreate(comm, &M)); 3698 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3699 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3700 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3701 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3702 PetscCall(PetscFree(dlens)); 3703 3704 } else { /* call == MAT_REUSE_MATRIX */ 3705 M = *newmat; 3706 PetscCall(MatGetLocalSize(M, &i, NULL)); 3707 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3708 PetscCall(MatZeroEntries(M)); 3709 /* 3710 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3711 rather than the slower MatSetValues(). 3712 */ 3713 M->was_assembled = PETSC_TRUE; 3714 M->assembled = PETSC_FALSE; 3715 } 3716 3717 /* (5) Set values of Msub to *newmat */ 3718 PetscCall(PetscMalloc1(count, &colsub)); 3719 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3720 3721 jj = aij->j; 3722 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3723 for (i = 0; i < m; i++) { 3724 row = rstart + i; 3725 nz = ii[i + 1] - ii[i]; 3726 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3727 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3728 jj += nz; 3729 aa += nz; 3730 } 3731 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3732 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3733 3734 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3735 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3736 3737 PetscCall(PetscFree(colsub)); 3738 3739 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3740 if (call == MAT_INITIAL_MATRIX) { 3741 *newmat = M; 3742 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3743 PetscCall(MatDestroy(&Msub)); 3744 3745 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3746 PetscCall(ISDestroy(&iscol_sub)); 3747 3748 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3749 PetscCall(ISDestroy(&iscmap)); 3750 3751 if (iscol_local) { 3752 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3753 PetscCall(ISDestroy(&iscol_local)); 3754 } 3755 } 3756 PetscFunctionReturn(PETSC_SUCCESS); 3757 } 3758 3759 /* 3760 Not great since it makes two copies of the submatrix, first an SeqAIJ 3761 in local and then by concatenating the local matrices the end result. 3762 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3763 3764 This requires a sequential iscol with all indices. 3765 */ 3766 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3767 { 3768 PetscMPIInt rank, size; 3769 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3770 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3771 Mat M, Mreuse; 3772 MatScalar *aa, *vwork; 3773 MPI_Comm comm; 3774 Mat_SeqAIJ *aij; 3775 PetscBool colflag, allcolumns = PETSC_FALSE; 3776 3777 PetscFunctionBegin; 3778 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3779 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3780 PetscCallMPI(MPI_Comm_size(comm, &size)); 3781 3782 /* Check for special case: each processor gets entire matrix columns */ 3783 PetscCall(ISIdentity(iscol, &colflag)); 3784 PetscCall(ISGetLocalSize(iscol, &n)); 3785 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3786 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3787 3788 if (call == MAT_REUSE_MATRIX) { 3789 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3790 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3791 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3792 } else { 3793 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3794 } 3795 3796 /* 3797 m - number of local rows 3798 n - number of columns (same on all processors) 3799 rstart - first row in new global matrix generated 3800 */ 3801 PetscCall(MatGetSize(Mreuse, &m, &n)); 3802 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3803 if (call == MAT_INITIAL_MATRIX) { 3804 aij = (Mat_SeqAIJ *)Mreuse->data; 3805 ii = aij->i; 3806 jj = aij->j; 3807 3808 /* 3809 Determine the number of non-zeros in the diagonal and off-diagonal 3810 portions of the matrix in order to do correct preallocation 3811 */ 3812 3813 /* first get start and end of "diagonal" columns */ 3814 if (csize == PETSC_DECIDE) { 3815 PetscCall(ISGetSize(isrow, &mglobal)); 3816 if (mglobal == n) { /* square matrix */ 3817 nlocal = m; 3818 } else { 3819 nlocal = n / size + ((n % size) > rank); 3820 } 3821 } else { 3822 nlocal = csize; 3823 } 3824 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3825 rstart = rend - nlocal; 3826 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3827 3828 /* next, compute all the lengths */ 3829 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3830 olens = dlens + m; 3831 for (i = 0; i < m; i++) { 3832 jend = ii[i + 1] - ii[i]; 3833 olen = 0; 3834 dlen = 0; 3835 for (j = 0; j < jend; j++) { 3836 if (*jj < rstart || *jj >= rend) olen++; 3837 else dlen++; 3838 jj++; 3839 } 3840 olens[i] = olen; 3841 dlens[i] = dlen; 3842 } 3843 PetscCall(MatCreate(comm, &M)); 3844 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3845 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3846 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3847 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3848 PetscCall(PetscFree(dlens)); 3849 } else { 3850 PetscInt ml, nl; 3851 3852 M = *newmat; 3853 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3854 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3855 PetscCall(MatZeroEntries(M)); 3856 /* 3857 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3858 rather than the slower MatSetValues(). 3859 */ 3860 M->was_assembled = PETSC_TRUE; 3861 M->assembled = PETSC_FALSE; 3862 } 3863 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3864 aij = (Mat_SeqAIJ *)Mreuse->data; 3865 ii = aij->i; 3866 jj = aij->j; 3867 3868 /* trigger copy to CPU if needed */ 3869 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3870 for (i = 0; i < m; i++) { 3871 row = rstart + i; 3872 nz = ii[i + 1] - ii[i]; 3873 cwork = jj; 3874 jj = PetscSafePointerPlusOffset(jj, nz); 3875 vwork = aa; 3876 aa = PetscSafePointerPlusOffset(aa, nz); 3877 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3878 } 3879 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3880 3881 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3882 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3883 *newmat = M; 3884 3885 /* save submatrix used in processor for next request */ 3886 if (call == MAT_INITIAL_MATRIX) { 3887 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3888 PetscCall(MatDestroy(&Mreuse)); 3889 } 3890 PetscFunctionReturn(PETSC_SUCCESS); 3891 } 3892 3893 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3894 { 3895 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3896 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3897 const PetscInt *JJ; 3898 PetscBool nooffprocentries; 3899 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3900 3901 PetscFunctionBegin; 3902 PetscCall(PetscLayoutSetUp(B->rmap)); 3903 PetscCall(PetscLayoutSetUp(B->cmap)); 3904 m = B->rmap->n; 3905 cstart = B->cmap->rstart; 3906 cend = B->cmap->rend; 3907 rstart = B->rmap->rstart; 3908 irstart = Ii[0]; 3909 3910 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3911 3912 if (PetscDefined(USE_DEBUG)) { 3913 for (i = 0; i < m; i++) { 3914 nnz = Ii[i + 1] - Ii[i]; 3915 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3916 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3917 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3918 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3919 } 3920 } 3921 3922 for (i = 0; i < m; i++) { 3923 nnz = Ii[i + 1] - Ii[i]; 3924 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3925 nnz_max = PetscMax(nnz_max, nnz); 3926 d = 0; 3927 for (j = 0; j < nnz; j++) { 3928 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3929 } 3930 d_nnz[i] = d; 3931 o_nnz[i] = nnz - d; 3932 } 3933 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3934 PetscCall(PetscFree2(d_nnz, o_nnz)); 3935 3936 for (i = 0; i < m; i++) { 3937 ii = i + rstart; 3938 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3939 } 3940 nooffprocentries = B->nooffprocentries; 3941 B->nooffprocentries = PETSC_TRUE; 3942 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3943 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3944 B->nooffprocentries = nooffprocentries; 3945 3946 /* count number of entries below block diagonal */ 3947 PetscCall(PetscFree(Aij->ld)); 3948 PetscCall(PetscCalloc1(m, &ld)); 3949 Aij->ld = ld; 3950 for (i = 0; i < m; i++) { 3951 nnz = Ii[i + 1] - Ii[i]; 3952 j = 0; 3953 while (j < nnz && J[j] < cstart) j++; 3954 ld[i] = j; 3955 if (J) J += nnz; 3956 } 3957 3958 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3959 PetscFunctionReturn(PETSC_SUCCESS); 3960 } 3961 3962 /*@ 3963 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3964 (the default parallel PETSc format). 3965 3966 Collective 3967 3968 Input Parameters: 3969 + B - the matrix 3970 . i - the indices into `j` for the start of each local row (indices start with zero) 3971 . j - the column indices for each local row (indices start with zero) 3972 - v - optional values in the matrix 3973 3974 Level: developer 3975 3976 Notes: 3977 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3978 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3979 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3980 3981 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3982 3983 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3984 3985 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3986 3987 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3988 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3989 3990 The format which is used for the sparse matrix input, is equivalent to a 3991 row-major ordering.. i.e for the following matrix, the input data expected is 3992 as shown 3993 .vb 3994 1 0 0 3995 2 0 3 P0 3996 ------- 3997 4 5 6 P1 3998 3999 Process0 [P0] rows_owned=[0,1] 4000 i = {0,1,3} [size = nrow+1 = 2+1] 4001 j = {0,0,2} [size = 3] 4002 v = {1,2,3} [size = 3] 4003 4004 Process1 [P1] rows_owned=[2] 4005 i = {0,3} [size = nrow+1 = 1+1] 4006 j = {0,1,2} [size = 3] 4007 v = {4,5,6} [size = 3] 4008 .ve 4009 4010 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4011 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4012 @*/ 4013 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4014 { 4015 PetscFunctionBegin; 4016 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4017 PetscFunctionReturn(PETSC_SUCCESS); 4018 } 4019 4020 /*@ 4021 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4022 (the default parallel PETSc format). For good matrix assembly performance 4023 the user should preallocate the matrix storage by setting the parameters 4024 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4025 4026 Collective 4027 4028 Input Parameters: 4029 + B - the matrix 4030 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4031 (same value is used for all local rows) 4032 . d_nnz - array containing the number of nonzeros in the various rows of the 4033 DIAGONAL portion of the local submatrix (possibly different for each row) 4034 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4035 The size of this array is equal to the number of local rows, i.e 'm'. 4036 For matrices that will be factored, you must leave room for (and set) 4037 the diagonal entry even if it is zero. 4038 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4039 submatrix (same value is used for all local rows). 4040 - o_nnz - array containing the number of nonzeros in the various rows of the 4041 OFF-DIAGONAL portion of the local submatrix (possibly different for 4042 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4043 structure. The size of this array is equal to the number 4044 of local rows, i.e 'm'. 4045 4046 Example Usage: 4047 Consider the following 8x8 matrix with 34 non-zero values, that is 4048 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4049 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4050 as follows 4051 4052 .vb 4053 1 2 0 | 0 3 0 | 0 4 4054 Proc0 0 5 6 | 7 0 0 | 8 0 4055 9 0 10 | 11 0 0 | 12 0 4056 ------------------------------------- 4057 13 0 14 | 15 16 17 | 0 0 4058 Proc1 0 18 0 | 19 20 21 | 0 0 4059 0 0 0 | 22 23 0 | 24 0 4060 ------------------------------------- 4061 Proc2 25 26 27 | 0 0 28 | 29 0 4062 30 0 0 | 31 32 33 | 0 34 4063 .ve 4064 4065 This can be represented as a collection of submatrices as 4066 .vb 4067 A B C 4068 D E F 4069 G H I 4070 .ve 4071 4072 Where the submatrices A,B,C are owned by proc0, D,E,F are 4073 owned by proc1, G,H,I are owned by proc2. 4074 4075 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4076 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4077 The 'M','N' parameters are 8,8, and have the same values on all procs. 4078 4079 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4080 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4081 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4082 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4083 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4084 matrix, and [DF] as another `MATSEQAIJ` matrix. 4085 4086 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4087 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4088 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4089 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4090 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4091 In this case, the values of `d_nz`, `o_nz` are 4092 .vb 4093 proc0 dnz = 2, o_nz = 2 4094 proc1 dnz = 3, o_nz = 2 4095 proc2 dnz = 1, o_nz = 4 4096 .ve 4097 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4098 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4099 for proc3. i.e we are using 12+15+10=37 storage locations to store 4100 34 values. 4101 4102 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4103 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4104 In the above case the values for `d_nnz`, `o_nnz` are 4105 .vb 4106 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4107 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4108 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4109 .ve 4110 Here the space allocated is sum of all the above values i.e 34, and 4111 hence pre-allocation is perfect. 4112 4113 Level: intermediate 4114 4115 Notes: 4116 If the *_nnz parameter is given then the *_nz parameter is ignored 4117 4118 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4119 storage. The stored row and column indices begin with zero. 4120 See [Sparse Matrices](sec_matsparse) for details. 4121 4122 The parallel matrix is partitioned such that the first m0 rows belong to 4123 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4124 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4125 4126 The DIAGONAL portion of the local submatrix of a processor can be defined 4127 as the submatrix which is obtained by extraction the part corresponding to 4128 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4129 first row that belongs to the processor, r2 is the last row belonging to 4130 the this processor, and c1-c2 is range of indices of the local part of a 4131 vector suitable for applying the matrix to. This is an mxn matrix. In the 4132 common case of a square matrix, the row and column ranges are the same and 4133 the DIAGONAL part is also square. The remaining portion of the local 4134 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4135 4136 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4137 4138 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4139 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4140 You can also run with the option `-info` and look for messages with the string 4141 malloc in them to see if additional memory allocation was needed. 4142 4143 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4144 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4145 @*/ 4146 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4147 { 4148 PetscFunctionBegin; 4149 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4150 PetscValidType(B, 1); 4151 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4152 PetscFunctionReturn(PETSC_SUCCESS); 4153 } 4154 4155 /*@ 4156 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4157 CSR format for the local rows. 4158 4159 Collective 4160 4161 Input Parameters: 4162 + comm - MPI communicator 4163 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4164 . n - This value should be the same as the local size used in creating the 4165 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4166 calculated if `N` is given) For square matrices n is almost always `m`. 4167 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4168 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4169 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4170 . j - global column indices 4171 - a - optional matrix values 4172 4173 Output Parameter: 4174 . mat - the matrix 4175 4176 Level: intermediate 4177 4178 Notes: 4179 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4180 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4181 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4182 4183 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4184 4185 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4186 4187 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4188 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4189 4190 The format which is used for the sparse matrix input, is equivalent to a 4191 row-major ordering, i.e., for the following matrix, the input data expected is 4192 as shown 4193 .vb 4194 1 0 0 4195 2 0 3 P0 4196 ------- 4197 4 5 6 P1 4198 4199 Process0 [P0] rows_owned=[0,1] 4200 i = {0,1,3} [size = nrow+1 = 2+1] 4201 j = {0,0,2} [size = 3] 4202 v = {1,2,3} [size = 3] 4203 4204 Process1 [P1] rows_owned=[2] 4205 i = {0,3} [size = nrow+1 = 1+1] 4206 j = {0,1,2} [size = 3] 4207 v = {4,5,6} [size = 3] 4208 .ve 4209 4210 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4211 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4212 @*/ 4213 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4214 { 4215 PetscFunctionBegin; 4216 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4217 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4218 PetscCall(MatCreate(comm, mat)); 4219 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4220 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4221 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4222 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4223 PetscFunctionReturn(PETSC_SUCCESS); 4224 } 4225 4226 /*@ 4227 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4228 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4229 from `MatCreateMPIAIJWithArrays()` 4230 4231 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4232 4233 Collective 4234 4235 Input Parameters: 4236 + mat - the matrix 4237 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4238 . n - This value should be the same as the local size used in creating the 4239 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4240 calculated if N is given) For square matrices n is almost always m. 4241 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4242 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4243 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4244 . J - column indices 4245 - v - matrix values 4246 4247 Level: deprecated 4248 4249 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4250 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4251 @*/ 4252 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4253 { 4254 PetscInt nnz, i; 4255 PetscBool nooffprocentries; 4256 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4257 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4258 PetscScalar *ad, *ao; 4259 PetscInt ldi, Iii, md; 4260 const PetscInt *Adi = Ad->i; 4261 PetscInt *ld = Aij->ld; 4262 4263 PetscFunctionBegin; 4264 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4265 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4266 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4267 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4268 4269 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4270 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4271 4272 for (i = 0; i < m; i++) { 4273 if (PetscDefined(USE_DEBUG)) { 4274 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4275 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4276 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4277 } 4278 } 4279 nnz = Ii[i + 1] - Ii[i]; 4280 Iii = Ii[i]; 4281 ldi = ld[i]; 4282 md = Adi[i + 1] - Adi[i]; 4283 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4284 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4285 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4286 ad += md; 4287 ao += nnz - md; 4288 } 4289 nooffprocentries = mat->nooffprocentries; 4290 mat->nooffprocentries = PETSC_TRUE; 4291 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4292 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4293 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4294 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4295 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4296 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4297 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4298 mat->nooffprocentries = nooffprocentries; 4299 PetscFunctionReturn(PETSC_SUCCESS); 4300 } 4301 4302 /*@ 4303 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4304 4305 Collective 4306 4307 Input Parameters: 4308 + mat - the matrix 4309 - v - matrix values, stored by row 4310 4311 Level: intermediate 4312 4313 Notes: 4314 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4315 4316 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4317 4318 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4319 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4320 @*/ 4321 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4322 { 4323 PetscInt nnz, i, m; 4324 PetscBool nooffprocentries; 4325 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4326 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4327 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4328 PetscScalar *ad, *ao; 4329 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4330 PetscInt ldi, Iii, md; 4331 PetscInt *ld = Aij->ld; 4332 4333 PetscFunctionBegin; 4334 m = mat->rmap->n; 4335 4336 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4337 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4338 Iii = 0; 4339 for (i = 0; i < m; i++) { 4340 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4341 ldi = ld[i]; 4342 md = Adi[i + 1] - Adi[i]; 4343 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4344 ad += md; 4345 if (ao) { 4346 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4347 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4348 ao += nnz - md; 4349 } 4350 Iii += nnz; 4351 } 4352 nooffprocentries = mat->nooffprocentries; 4353 mat->nooffprocentries = PETSC_TRUE; 4354 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4355 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4356 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4357 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4358 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4359 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4360 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4361 mat->nooffprocentries = nooffprocentries; 4362 PetscFunctionReturn(PETSC_SUCCESS); 4363 } 4364 4365 /*@ 4366 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4367 (the default parallel PETSc format). For good matrix assembly performance 4368 the user should preallocate the matrix storage by setting the parameters 4369 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4370 4371 Collective 4372 4373 Input Parameters: 4374 + comm - MPI communicator 4375 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4376 This value should be the same as the local size used in creating the 4377 y vector for the matrix-vector product y = Ax. 4378 . n - This value should be the same as the local size used in creating the 4379 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4380 calculated if N is given) For square matrices n is almost always m. 4381 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4382 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4383 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4384 (same value is used for all local rows) 4385 . d_nnz - array containing the number of nonzeros in the various rows of the 4386 DIAGONAL portion of the local submatrix (possibly different for each row) 4387 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4388 The size of this array is equal to the number of local rows, i.e 'm'. 4389 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4390 submatrix (same value is used for all local rows). 4391 - o_nnz - array containing the number of nonzeros in the various rows of the 4392 OFF-DIAGONAL portion of the local submatrix (possibly different for 4393 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4394 structure. The size of this array is equal to the number 4395 of local rows, i.e 'm'. 4396 4397 Output Parameter: 4398 . A - the matrix 4399 4400 Options Database Keys: 4401 + -mat_no_inode - Do not use inodes 4402 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4403 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4404 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4405 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4406 4407 Level: intermediate 4408 4409 Notes: 4410 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4411 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4412 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4413 4414 If the *_nnz parameter is given then the *_nz parameter is ignored 4415 4416 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4417 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4418 storage requirements for this matrix. 4419 4420 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4421 processor than it must be used on all processors that share the object for 4422 that argument. 4423 4424 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4425 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4426 4427 The user MUST specify either the local or global matrix dimensions 4428 (possibly both). 4429 4430 The parallel matrix is partitioned across processors such that the 4431 first `m0` rows belong to process 0, the next `m1` rows belong to 4432 process 1, the next `m2` rows belong to process 2, etc., where 4433 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4434 values corresponding to [m x N] submatrix. 4435 4436 The columns are logically partitioned with the n0 columns belonging 4437 to 0th partition, the next n1 columns belonging to the next 4438 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4439 4440 The DIAGONAL portion of the local submatrix on any given processor 4441 is the submatrix corresponding to the rows and columns m,n 4442 corresponding to the given processor. i.e diagonal matrix on 4443 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4444 etc. The remaining portion of the local submatrix [m x (N-n)] 4445 constitute the OFF-DIAGONAL portion. The example below better 4446 illustrates this concept. The two matrices, the DIAGONAL portion and 4447 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4448 4449 For a square global matrix we define each processor's diagonal portion 4450 to be its local rows and the corresponding columns (a square submatrix); 4451 each processor's off-diagonal portion encompasses the remainder of the 4452 local matrix (a rectangular submatrix). 4453 4454 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4455 4456 When calling this routine with a single process communicator, a matrix of 4457 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4458 type of communicator, use the construction mechanism 4459 .vb 4460 MatCreate(..., &A); 4461 MatSetType(A, MATMPIAIJ); 4462 MatSetSizes(A, m, n, M, N); 4463 MatMPIAIJSetPreallocation(A, ...); 4464 .ve 4465 4466 By default, this format uses inodes (identical nodes) when possible. 4467 We search for consecutive rows with the same nonzero structure, thereby 4468 reusing matrix information to achieve increased efficiency. 4469 4470 Example Usage: 4471 Consider the following 8x8 matrix with 34 non-zero values, that is 4472 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4473 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4474 as follows 4475 4476 .vb 4477 1 2 0 | 0 3 0 | 0 4 4478 Proc0 0 5 6 | 7 0 0 | 8 0 4479 9 0 10 | 11 0 0 | 12 0 4480 ------------------------------------- 4481 13 0 14 | 15 16 17 | 0 0 4482 Proc1 0 18 0 | 19 20 21 | 0 0 4483 0 0 0 | 22 23 0 | 24 0 4484 ------------------------------------- 4485 Proc2 25 26 27 | 0 0 28 | 29 0 4486 30 0 0 | 31 32 33 | 0 34 4487 .ve 4488 4489 This can be represented as a collection of submatrices as 4490 4491 .vb 4492 A B C 4493 D E F 4494 G H I 4495 .ve 4496 4497 Where the submatrices A,B,C are owned by proc0, D,E,F are 4498 owned by proc1, G,H,I are owned by proc2. 4499 4500 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4501 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4502 The 'M','N' parameters are 8,8, and have the same values on all procs. 4503 4504 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4505 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4506 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4507 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4508 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4509 matrix, and [DF] as another SeqAIJ matrix. 4510 4511 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4512 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4513 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4514 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4515 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4516 In this case, the values of `d_nz`,`o_nz` are 4517 .vb 4518 proc0 dnz = 2, o_nz = 2 4519 proc1 dnz = 3, o_nz = 2 4520 proc2 dnz = 1, o_nz = 4 4521 .ve 4522 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4523 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4524 for proc3. i.e we are using 12+15+10=37 storage locations to store 4525 34 values. 4526 4527 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4528 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4529 In the above case the values for d_nnz,o_nnz are 4530 .vb 4531 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4532 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4533 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4534 .ve 4535 Here the space allocated is sum of all the above values i.e 34, and 4536 hence pre-allocation is perfect. 4537 4538 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4539 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4540 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4541 @*/ 4542 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4543 { 4544 PetscMPIInt size; 4545 4546 PetscFunctionBegin; 4547 PetscCall(MatCreate(comm, A)); 4548 PetscCall(MatSetSizes(*A, m, n, M, N)); 4549 PetscCallMPI(MPI_Comm_size(comm, &size)); 4550 if (size > 1) { 4551 PetscCall(MatSetType(*A, MATMPIAIJ)); 4552 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4553 } else { 4554 PetscCall(MatSetType(*A, MATSEQAIJ)); 4555 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4556 } 4557 PetscFunctionReturn(PETSC_SUCCESS); 4558 } 4559 4560 /*MC 4561 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4562 4563 Synopsis: 4564 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4565 4566 Not Collective 4567 4568 Input Parameter: 4569 . A - the `MATMPIAIJ` matrix 4570 4571 Output Parameters: 4572 + Ad - the diagonal portion of the matrix 4573 . Ao - the off-diagonal portion of the matrix 4574 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4575 - ierr - error code 4576 4577 Level: advanced 4578 4579 Note: 4580 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4581 4582 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4583 M*/ 4584 4585 /*MC 4586 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4587 4588 Synopsis: 4589 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4590 4591 Not Collective 4592 4593 Input Parameters: 4594 + A - the `MATMPIAIJ` matrix 4595 . Ad - the diagonal portion of the matrix 4596 . Ao - the off-diagonal portion of the matrix 4597 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4598 - ierr - error code 4599 4600 Level: advanced 4601 4602 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4603 M*/ 4604 4605 /*@C 4606 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4607 4608 Not Collective 4609 4610 Input Parameter: 4611 . A - The `MATMPIAIJ` matrix 4612 4613 Output Parameters: 4614 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4615 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4616 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4617 4618 Level: intermediate 4619 4620 Note: 4621 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4622 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4623 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4624 local column numbers to global column numbers in the original matrix. 4625 4626 Fortran Notes: 4627 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4628 4629 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4630 @*/ 4631 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4632 { 4633 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4634 PetscBool flg; 4635 4636 PetscFunctionBegin; 4637 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4638 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4639 if (Ad) *Ad = a->A; 4640 if (Ao) *Ao = a->B; 4641 if (colmap) *colmap = a->garray; 4642 PetscFunctionReturn(PETSC_SUCCESS); 4643 } 4644 4645 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4646 { 4647 PetscInt m, N, i, rstart, nnz, Ii; 4648 PetscInt *indx; 4649 PetscScalar *values; 4650 MatType rootType; 4651 4652 PetscFunctionBegin; 4653 PetscCall(MatGetSize(inmat, &m, &N)); 4654 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4655 PetscInt *dnz, *onz, sum, bs, cbs; 4656 4657 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4658 /* Check sum(n) = N */ 4659 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4660 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4661 4662 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4663 rstart -= m; 4664 4665 MatPreallocateBegin(comm, m, n, dnz, onz); 4666 for (i = 0; i < m; i++) { 4667 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4668 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4669 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4670 } 4671 4672 PetscCall(MatCreate(comm, outmat)); 4673 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4674 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4675 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4676 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4677 PetscCall(MatSetType(*outmat, rootType)); 4678 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4679 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4680 MatPreallocateEnd(dnz, onz); 4681 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4682 } 4683 4684 /* numeric phase */ 4685 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4686 for (i = 0; i < m; i++) { 4687 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4688 Ii = i + rstart; 4689 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4690 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4691 } 4692 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4693 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4694 PetscFunctionReturn(PETSC_SUCCESS); 4695 } 4696 4697 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4698 { 4699 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4700 4701 PetscFunctionBegin; 4702 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4703 PetscCall(PetscFree(merge->id_r)); 4704 PetscCall(PetscFree(merge->len_s)); 4705 PetscCall(PetscFree(merge->len_r)); 4706 PetscCall(PetscFree(merge->bi)); 4707 PetscCall(PetscFree(merge->bj)); 4708 PetscCall(PetscFree(merge->buf_ri[0])); 4709 PetscCall(PetscFree(merge->buf_ri)); 4710 PetscCall(PetscFree(merge->buf_rj[0])); 4711 PetscCall(PetscFree(merge->buf_rj)); 4712 PetscCall(PetscFree(merge->coi)); 4713 PetscCall(PetscFree(merge->coj)); 4714 PetscCall(PetscFree(merge->owners_co)); 4715 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4716 PetscCall(PetscFree(merge)); 4717 PetscFunctionReturn(PETSC_SUCCESS); 4718 } 4719 4720 #include <../src/mat/utils/freespace.h> 4721 #include <petscbt.h> 4722 4723 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4724 { 4725 MPI_Comm comm; 4726 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4727 PetscMPIInt size, rank, taga, *len_s; 4728 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4729 PetscMPIInt proc, k; 4730 PetscInt **buf_ri, **buf_rj; 4731 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4732 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4733 MPI_Request *s_waits, *r_waits; 4734 MPI_Status *status; 4735 const MatScalar *aa, *a_a; 4736 MatScalar **abuf_r, *ba_i; 4737 Mat_Merge_SeqsToMPI *merge; 4738 PetscContainer container; 4739 4740 PetscFunctionBegin; 4741 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4742 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4743 4744 PetscCallMPI(MPI_Comm_size(comm, &size)); 4745 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4746 4747 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4748 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4749 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4750 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4751 aa = a_a; 4752 4753 bi = merge->bi; 4754 bj = merge->bj; 4755 buf_ri = merge->buf_ri; 4756 buf_rj = merge->buf_rj; 4757 4758 PetscCall(PetscMalloc1(size, &status)); 4759 owners = merge->rowmap->range; 4760 len_s = merge->len_s; 4761 4762 /* send and recv matrix values */ 4763 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4764 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4765 4766 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4767 for (proc = 0, k = 0; proc < size; proc++) { 4768 if (!len_s[proc]) continue; 4769 i = owners[proc]; 4770 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4771 k++; 4772 } 4773 4774 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4775 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4776 PetscCall(PetscFree(status)); 4777 4778 PetscCall(PetscFree(s_waits)); 4779 PetscCall(PetscFree(r_waits)); 4780 4781 /* insert mat values of mpimat */ 4782 PetscCall(PetscMalloc1(N, &ba_i)); 4783 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4784 4785 for (k = 0; k < merge->nrecv; k++) { 4786 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4787 nrows = *buf_ri_k[k]; 4788 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4789 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4790 } 4791 4792 /* set values of ba */ 4793 m = merge->rowmap->n; 4794 for (i = 0; i < m; i++) { 4795 arow = owners[rank] + i; 4796 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4797 bnzi = bi[i + 1] - bi[i]; 4798 PetscCall(PetscArrayzero(ba_i, bnzi)); 4799 4800 /* add local non-zero vals of this proc's seqmat into ba */ 4801 anzi = ai[arow + 1] - ai[arow]; 4802 aj = a->j + ai[arow]; 4803 aa = a_a + ai[arow]; 4804 nextaj = 0; 4805 for (j = 0; nextaj < anzi; j++) { 4806 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4807 ba_i[j] += aa[nextaj++]; 4808 } 4809 } 4810 4811 /* add received vals into ba */ 4812 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4813 /* i-th row */ 4814 if (i == *nextrow[k]) { 4815 anzi = *(nextai[k] + 1) - *nextai[k]; 4816 aj = buf_rj[k] + *nextai[k]; 4817 aa = abuf_r[k] + *nextai[k]; 4818 nextaj = 0; 4819 for (j = 0; nextaj < anzi; j++) { 4820 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4821 ba_i[j] += aa[nextaj++]; 4822 } 4823 } 4824 nextrow[k]++; 4825 nextai[k]++; 4826 } 4827 } 4828 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4829 } 4830 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4831 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4832 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4833 4834 PetscCall(PetscFree(abuf_r[0])); 4835 PetscCall(PetscFree(abuf_r)); 4836 PetscCall(PetscFree(ba_i)); 4837 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4838 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4839 PetscFunctionReturn(PETSC_SUCCESS); 4840 } 4841 4842 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4843 { 4844 Mat B_mpi; 4845 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4846 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4847 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4848 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4849 PetscInt len, *dnz, *onz, bs, cbs; 4850 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4851 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4852 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4853 MPI_Status *status; 4854 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4855 PetscBT lnkbt; 4856 Mat_Merge_SeqsToMPI *merge; 4857 PetscContainer container; 4858 4859 PetscFunctionBegin; 4860 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4861 4862 /* make sure it is a PETSc comm */ 4863 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4864 PetscCallMPI(MPI_Comm_size(comm, &size)); 4865 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4866 4867 PetscCall(PetscNew(&merge)); 4868 PetscCall(PetscMalloc1(size, &status)); 4869 4870 /* determine row ownership */ 4871 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4872 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4873 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4874 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4875 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4876 PetscCall(PetscMalloc1(size, &len_si)); 4877 PetscCall(PetscMalloc1(size, &merge->len_s)); 4878 4879 m = merge->rowmap->n; 4880 owners = merge->rowmap->range; 4881 4882 /* determine the number of messages to send, their lengths */ 4883 len_s = merge->len_s; 4884 4885 len = 0; /* length of buf_si[] */ 4886 merge->nsend = 0; 4887 for (PetscMPIInt proc = 0; proc < size; proc++) { 4888 len_si[proc] = 0; 4889 if (proc == rank) { 4890 len_s[proc] = 0; 4891 } else { 4892 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4893 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4894 } 4895 if (len_s[proc]) { 4896 merge->nsend++; 4897 nrows = 0; 4898 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4899 if (ai[i + 1] > ai[i]) nrows++; 4900 } 4901 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4902 len += len_si[proc]; 4903 } 4904 } 4905 4906 /* determine the number and length of messages to receive for ij-structure */ 4907 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4908 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4909 4910 /* post the Irecv of j-structure */ 4911 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4912 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4913 4914 /* post the Isend of j-structure */ 4915 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4916 4917 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4918 if (!len_s[proc]) continue; 4919 i = owners[proc]; 4920 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4921 k++; 4922 } 4923 4924 /* receives and sends of j-structure are complete */ 4925 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4926 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4927 4928 /* send and recv i-structure */ 4929 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4930 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4931 4932 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4933 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4934 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4935 if (!len_s[proc]) continue; 4936 /* form outgoing message for i-structure: 4937 buf_si[0]: nrows to be sent 4938 [1:nrows]: row index (global) 4939 [nrows+1:2*nrows+1]: i-structure index 4940 */ 4941 nrows = len_si[proc] / 2 - 1; 4942 buf_si_i = buf_si + nrows + 1; 4943 buf_si[0] = nrows; 4944 buf_si_i[0] = 0; 4945 nrows = 0; 4946 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4947 anzi = ai[i + 1] - ai[i]; 4948 if (anzi) { 4949 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4950 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4951 nrows++; 4952 } 4953 } 4954 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4955 k++; 4956 buf_si += len_si[proc]; 4957 } 4958 4959 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4960 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4961 4962 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4963 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4964 4965 PetscCall(PetscFree(len_si)); 4966 PetscCall(PetscFree(len_ri)); 4967 PetscCall(PetscFree(rj_waits)); 4968 PetscCall(PetscFree2(si_waits, sj_waits)); 4969 PetscCall(PetscFree(ri_waits)); 4970 PetscCall(PetscFree(buf_s)); 4971 PetscCall(PetscFree(status)); 4972 4973 /* compute a local seq matrix in each processor */ 4974 /* allocate bi array and free space for accumulating nonzero column info */ 4975 PetscCall(PetscMalloc1(m + 1, &bi)); 4976 bi[0] = 0; 4977 4978 /* create and initialize a linked list */ 4979 nlnk = N + 1; 4980 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4981 4982 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4983 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4984 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4985 4986 current_space = free_space; 4987 4988 /* determine symbolic info for each local row */ 4989 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4990 4991 for (k = 0; k < merge->nrecv; k++) { 4992 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4993 nrows = *buf_ri_k[k]; 4994 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4995 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4996 } 4997 4998 MatPreallocateBegin(comm, m, n, dnz, onz); 4999 len = 0; 5000 for (i = 0; i < m; i++) { 5001 bnzi = 0; 5002 /* add local non-zero cols of this proc's seqmat into lnk */ 5003 arow = owners[rank] + i; 5004 anzi = ai[arow + 1] - ai[arow]; 5005 aj = a->j + ai[arow]; 5006 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5007 bnzi += nlnk; 5008 /* add received col data into lnk */ 5009 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5010 if (i == *nextrow[k]) { /* i-th row */ 5011 anzi = *(nextai[k] + 1) - *nextai[k]; 5012 aj = buf_rj[k] + *nextai[k]; 5013 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5014 bnzi += nlnk; 5015 nextrow[k]++; 5016 nextai[k]++; 5017 } 5018 } 5019 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5020 5021 /* if free space is not available, make more free space */ 5022 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5023 /* copy data into free space, then initialize lnk */ 5024 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5025 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5026 5027 current_space->array += bnzi; 5028 current_space->local_used += bnzi; 5029 current_space->local_remaining -= bnzi; 5030 5031 bi[i + 1] = bi[i] + bnzi; 5032 } 5033 5034 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5035 5036 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5037 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5038 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5039 5040 /* create symbolic parallel matrix B_mpi */ 5041 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5042 PetscCall(MatCreate(comm, &B_mpi)); 5043 if (n == PETSC_DECIDE) { 5044 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5045 } else { 5046 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5047 } 5048 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5049 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5050 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5051 MatPreallocateEnd(dnz, onz); 5052 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5053 5054 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5055 B_mpi->assembled = PETSC_FALSE; 5056 merge->bi = bi; 5057 merge->bj = bj; 5058 merge->buf_ri = buf_ri; 5059 merge->buf_rj = buf_rj; 5060 merge->coi = NULL; 5061 merge->coj = NULL; 5062 merge->owners_co = NULL; 5063 5064 PetscCall(PetscCommDestroy(&comm)); 5065 5066 /* attach the supporting struct to B_mpi for reuse */ 5067 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5068 PetscCall(PetscContainerSetPointer(container, merge)); 5069 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5070 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5071 PetscCall(PetscContainerDestroy(&container)); 5072 *mpimat = B_mpi; 5073 5074 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5075 PetscFunctionReturn(PETSC_SUCCESS); 5076 } 5077 5078 /*@ 5079 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5080 matrices from each processor 5081 5082 Collective 5083 5084 Input Parameters: 5085 + comm - the communicators the parallel matrix will live on 5086 . seqmat - the input sequential matrices 5087 . m - number of local rows (or `PETSC_DECIDE`) 5088 . n - number of local columns (or `PETSC_DECIDE`) 5089 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5090 5091 Output Parameter: 5092 . mpimat - the parallel matrix generated 5093 5094 Level: advanced 5095 5096 Note: 5097 The dimensions of the sequential matrix in each processor MUST be the same. 5098 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5099 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5100 5101 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5102 @*/ 5103 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5104 { 5105 PetscMPIInt size; 5106 5107 PetscFunctionBegin; 5108 PetscCallMPI(MPI_Comm_size(comm, &size)); 5109 if (size == 1) { 5110 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5111 if (scall == MAT_INITIAL_MATRIX) { 5112 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5113 } else { 5114 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5115 } 5116 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5117 PetscFunctionReturn(PETSC_SUCCESS); 5118 } 5119 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5120 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5121 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5122 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5123 PetscFunctionReturn(PETSC_SUCCESS); 5124 } 5125 5126 /*@ 5127 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5128 5129 Not Collective 5130 5131 Input Parameter: 5132 . A - the matrix 5133 5134 Output Parameter: 5135 . A_loc - the local sequential matrix generated 5136 5137 Level: developer 5138 5139 Notes: 5140 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5141 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5142 `n` is the global column count obtained with `MatGetSize()` 5143 5144 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5145 5146 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5147 5148 Destroy the matrix with `MatDestroy()` 5149 5150 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5151 @*/ 5152 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5153 { 5154 PetscBool mpi; 5155 5156 PetscFunctionBegin; 5157 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5158 if (mpi) { 5159 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5160 } else { 5161 *A_loc = A; 5162 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5163 } 5164 PetscFunctionReturn(PETSC_SUCCESS); 5165 } 5166 5167 /*@ 5168 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5169 5170 Not Collective 5171 5172 Input Parameters: 5173 + A - the matrix 5174 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5175 5176 Output Parameter: 5177 . A_loc - the local sequential matrix generated 5178 5179 Level: developer 5180 5181 Notes: 5182 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5183 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5184 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5185 5186 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5187 5188 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5189 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5190 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5191 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5192 5193 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5194 @*/ 5195 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5196 { 5197 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5198 Mat_SeqAIJ *mat, *a, *b; 5199 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5200 const PetscScalar *aa, *ba, *aav, *bav; 5201 PetscScalar *ca, *cam; 5202 PetscMPIInt size; 5203 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5204 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5205 PetscBool match; 5206 5207 PetscFunctionBegin; 5208 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5209 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5210 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5211 if (size == 1) { 5212 if (scall == MAT_INITIAL_MATRIX) { 5213 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5214 *A_loc = mpimat->A; 5215 } else if (scall == MAT_REUSE_MATRIX) { 5216 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5217 } 5218 PetscFunctionReturn(PETSC_SUCCESS); 5219 } 5220 5221 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5222 a = (Mat_SeqAIJ *)mpimat->A->data; 5223 b = (Mat_SeqAIJ *)mpimat->B->data; 5224 ai = a->i; 5225 aj = a->j; 5226 bi = b->i; 5227 bj = b->j; 5228 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5229 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5230 aa = aav; 5231 ba = bav; 5232 if (scall == MAT_INITIAL_MATRIX) { 5233 PetscCall(PetscMalloc1(1 + am, &ci)); 5234 ci[0] = 0; 5235 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5236 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5237 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5238 k = 0; 5239 for (i = 0; i < am; i++) { 5240 ncols_o = bi[i + 1] - bi[i]; 5241 ncols_d = ai[i + 1] - ai[i]; 5242 /* off-diagonal portion of A */ 5243 for (jo = 0; jo < ncols_o; jo++) { 5244 col = cmap[*bj]; 5245 if (col >= cstart) break; 5246 cj[k] = col; 5247 bj++; 5248 ca[k++] = *ba++; 5249 } 5250 /* diagonal portion of A */ 5251 for (j = 0; j < ncols_d; j++) { 5252 cj[k] = cstart + *aj++; 5253 ca[k++] = *aa++; 5254 } 5255 /* off-diagonal portion of A */ 5256 for (j = jo; j < ncols_o; j++) { 5257 cj[k] = cmap[*bj++]; 5258 ca[k++] = *ba++; 5259 } 5260 } 5261 /* put together the new matrix */ 5262 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5263 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5264 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5265 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5266 mat->free_a = PETSC_TRUE; 5267 mat->free_ij = PETSC_TRUE; 5268 mat->nonew = 0; 5269 } else if (scall == MAT_REUSE_MATRIX) { 5270 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5271 ci = mat->i; 5272 cj = mat->j; 5273 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5274 for (i = 0; i < am; i++) { 5275 /* off-diagonal portion of A */ 5276 ncols_o = bi[i + 1] - bi[i]; 5277 for (jo = 0; jo < ncols_o; jo++) { 5278 col = cmap[*bj]; 5279 if (col >= cstart) break; 5280 *cam++ = *ba++; 5281 bj++; 5282 } 5283 /* diagonal portion of A */ 5284 ncols_d = ai[i + 1] - ai[i]; 5285 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5286 /* off-diagonal portion of A */ 5287 for (j = jo; j < ncols_o; j++) { 5288 *cam++ = *ba++; 5289 bj++; 5290 } 5291 } 5292 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5293 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5294 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5295 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5296 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5297 PetscFunctionReturn(PETSC_SUCCESS); 5298 } 5299 5300 /*@ 5301 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5302 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5303 5304 Not Collective 5305 5306 Input Parameters: 5307 + A - the matrix 5308 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5309 5310 Output Parameters: 5311 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5312 - A_loc - the local sequential matrix generated 5313 5314 Level: developer 5315 5316 Note: 5317 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5318 part, then those associated with the off-diagonal part (in its local ordering) 5319 5320 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5321 @*/ 5322 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5323 { 5324 Mat Ao, Ad; 5325 const PetscInt *cmap; 5326 PetscMPIInt size; 5327 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5328 5329 PetscFunctionBegin; 5330 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5331 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5332 if (size == 1) { 5333 if (scall == MAT_INITIAL_MATRIX) { 5334 PetscCall(PetscObjectReference((PetscObject)Ad)); 5335 *A_loc = Ad; 5336 } else if (scall == MAT_REUSE_MATRIX) { 5337 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5338 } 5339 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5340 PetscFunctionReturn(PETSC_SUCCESS); 5341 } 5342 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5343 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5344 if (f) { 5345 PetscCall((*f)(A, scall, glob, A_loc)); 5346 } else { 5347 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5348 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5349 Mat_SeqAIJ *c; 5350 PetscInt *ai = a->i, *aj = a->j; 5351 PetscInt *bi = b->i, *bj = b->j; 5352 PetscInt *ci, *cj; 5353 const PetscScalar *aa, *ba; 5354 PetscScalar *ca; 5355 PetscInt i, j, am, dn, on; 5356 5357 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5358 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5359 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5360 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5361 if (scall == MAT_INITIAL_MATRIX) { 5362 PetscInt k; 5363 PetscCall(PetscMalloc1(1 + am, &ci)); 5364 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5365 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5366 ci[0] = 0; 5367 for (i = 0, k = 0; i < am; i++) { 5368 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5369 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5370 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5371 /* diagonal portion of A */ 5372 for (j = 0; j < ncols_d; j++, k++) { 5373 cj[k] = *aj++; 5374 ca[k] = *aa++; 5375 } 5376 /* off-diagonal portion of A */ 5377 for (j = 0; j < ncols_o; j++, k++) { 5378 cj[k] = dn + *bj++; 5379 ca[k] = *ba++; 5380 } 5381 } 5382 /* put together the new matrix */ 5383 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5384 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5385 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5386 c = (Mat_SeqAIJ *)(*A_loc)->data; 5387 c->free_a = PETSC_TRUE; 5388 c->free_ij = PETSC_TRUE; 5389 c->nonew = 0; 5390 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5391 } else if (scall == MAT_REUSE_MATRIX) { 5392 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5393 for (i = 0; i < am; i++) { 5394 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5395 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5396 /* diagonal portion of A */ 5397 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5398 /* off-diagonal portion of A */ 5399 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5400 } 5401 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5402 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5403 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5404 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5405 if (glob) { 5406 PetscInt cst, *gidx; 5407 5408 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5409 PetscCall(PetscMalloc1(dn + on, &gidx)); 5410 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5411 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5412 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5413 } 5414 } 5415 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5416 PetscFunctionReturn(PETSC_SUCCESS); 5417 } 5418 5419 /*@C 5420 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5421 5422 Not Collective 5423 5424 Input Parameters: 5425 + A - the matrix 5426 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5427 . row - index set of rows to extract (or `NULL`) 5428 - col - index set of columns to extract (or `NULL`) 5429 5430 Output Parameter: 5431 . A_loc - the local sequential matrix generated 5432 5433 Level: developer 5434 5435 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5436 @*/ 5437 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5438 { 5439 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5440 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5441 IS isrowa, iscola; 5442 Mat *aloc; 5443 PetscBool match; 5444 5445 PetscFunctionBegin; 5446 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5447 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5448 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5449 if (!row) { 5450 start = A->rmap->rstart; 5451 end = A->rmap->rend; 5452 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5453 } else { 5454 isrowa = *row; 5455 } 5456 if (!col) { 5457 start = A->cmap->rstart; 5458 cmap = a->garray; 5459 nzA = a->A->cmap->n; 5460 nzB = a->B->cmap->n; 5461 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5462 ncols = 0; 5463 for (i = 0; i < nzB; i++) { 5464 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5465 else break; 5466 } 5467 imark = i; 5468 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5469 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5470 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5471 } else { 5472 iscola = *col; 5473 } 5474 if (scall != MAT_INITIAL_MATRIX) { 5475 PetscCall(PetscMalloc1(1, &aloc)); 5476 aloc[0] = *A_loc; 5477 } 5478 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5479 if (!col) { /* attach global id of condensed columns */ 5480 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5481 } 5482 *A_loc = aloc[0]; 5483 PetscCall(PetscFree(aloc)); 5484 if (!row) PetscCall(ISDestroy(&isrowa)); 5485 if (!col) PetscCall(ISDestroy(&iscola)); 5486 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5487 PetscFunctionReturn(PETSC_SUCCESS); 5488 } 5489 5490 /* 5491 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5492 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5493 * on a global size. 5494 * */ 5495 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5496 { 5497 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5498 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5499 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5500 PetscMPIInt owner; 5501 PetscSFNode *iremote, *oiremote; 5502 const PetscInt *lrowindices; 5503 PetscSF sf, osf; 5504 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5505 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5506 MPI_Comm comm; 5507 ISLocalToGlobalMapping mapping; 5508 const PetscScalar *pd_a, *po_a; 5509 5510 PetscFunctionBegin; 5511 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5512 /* plocalsize is the number of roots 5513 * nrows is the number of leaves 5514 * */ 5515 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5516 PetscCall(ISGetLocalSize(rows, &nrows)); 5517 PetscCall(PetscCalloc1(nrows, &iremote)); 5518 PetscCall(ISGetIndices(rows, &lrowindices)); 5519 for (i = 0; i < nrows; i++) { 5520 /* Find a remote index and an owner for a row 5521 * The row could be local or remote 5522 * */ 5523 owner = 0; 5524 lidx = 0; 5525 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5526 iremote[i].index = lidx; 5527 iremote[i].rank = owner; 5528 } 5529 /* Create SF to communicate how many nonzero columns for each row */ 5530 PetscCall(PetscSFCreate(comm, &sf)); 5531 /* SF will figure out the number of nonzero columns for each row, and their 5532 * offsets 5533 * */ 5534 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5535 PetscCall(PetscSFSetFromOptions(sf)); 5536 PetscCall(PetscSFSetUp(sf)); 5537 5538 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5539 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5540 PetscCall(PetscCalloc1(nrows, &pnnz)); 5541 roffsets[0] = 0; 5542 roffsets[1] = 0; 5543 for (i = 0; i < plocalsize; i++) { 5544 /* diagonal */ 5545 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5546 /* off-diagonal */ 5547 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5548 /* compute offsets so that we relative location for each row */ 5549 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5550 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5551 } 5552 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5553 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5554 /* 'r' means root, and 'l' means leaf */ 5555 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5556 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5557 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5558 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5559 PetscCall(PetscSFDestroy(&sf)); 5560 PetscCall(PetscFree(roffsets)); 5561 PetscCall(PetscFree(nrcols)); 5562 dntotalcols = 0; 5563 ontotalcols = 0; 5564 ncol = 0; 5565 for (i = 0; i < nrows; i++) { 5566 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5567 ncol = PetscMax(pnnz[i], ncol); 5568 /* diagonal */ 5569 dntotalcols += nlcols[i * 2 + 0]; 5570 /* off-diagonal */ 5571 ontotalcols += nlcols[i * 2 + 1]; 5572 } 5573 /* We do not need to figure the right number of columns 5574 * since all the calculations will be done by going through the raw data 5575 * */ 5576 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5577 PetscCall(MatSetUp(*P_oth)); 5578 PetscCall(PetscFree(pnnz)); 5579 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5580 /* diagonal */ 5581 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5582 /* off-diagonal */ 5583 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5584 /* diagonal */ 5585 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5586 /* off-diagonal */ 5587 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5588 dntotalcols = 0; 5589 ontotalcols = 0; 5590 ntotalcols = 0; 5591 for (i = 0; i < nrows; i++) { 5592 owner = 0; 5593 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5594 /* Set iremote for diag matrix */ 5595 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5596 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5597 iremote[dntotalcols].rank = owner; 5598 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5599 ilocal[dntotalcols++] = ntotalcols++; 5600 } 5601 /* off-diagonal */ 5602 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5603 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5604 oiremote[ontotalcols].rank = owner; 5605 oilocal[ontotalcols++] = ntotalcols++; 5606 } 5607 } 5608 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5609 PetscCall(PetscFree(loffsets)); 5610 PetscCall(PetscFree(nlcols)); 5611 PetscCall(PetscSFCreate(comm, &sf)); 5612 /* P serves as roots and P_oth is leaves 5613 * Diag matrix 5614 * */ 5615 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5616 PetscCall(PetscSFSetFromOptions(sf)); 5617 PetscCall(PetscSFSetUp(sf)); 5618 5619 PetscCall(PetscSFCreate(comm, &osf)); 5620 /* off-diagonal */ 5621 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5622 PetscCall(PetscSFSetFromOptions(osf)); 5623 PetscCall(PetscSFSetUp(osf)); 5624 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5625 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5626 /* operate on the matrix internal data to save memory */ 5627 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5628 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5629 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5630 /* Convert to global indices for diag matrix */ 5631 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5632 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5633 /* We want P_oth store global indices */ 5634 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5635 /* Use memory scalable approach */ 5636 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5637 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5638 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5639 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5640 /* Convert back to local indices */ 5641 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5642 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5643 nout = 0; 5644 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5645 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5646 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5647 /* Exchange values */ 5648 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5649 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5650 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5651 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5652 /* Stop PETSc from shrinking memory */ 5653 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5654 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5655 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5656 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5657 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5658 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5659 PetscCall(PetscSFDestroy(&sf)); 5660 PetscCall(PetscSFDestroy(&osf)); 5661 PetscFunctionReturn(PETSC_SUCCESS); 5662 } 5663 5664 /* 5665 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5666 * This supports MPIAIJ and MAIJ 5667 * */ 5668 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5669 { 5670 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5671 Mat_SeqAIJ *p_oth; 5672 IS rows, map; 5673 PetscHMapI hamp; 5674 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5675 MPI_Comm comm; 5676 PetscSF sf, osf; 5677 PetscBool has; 5678 5679 PetscFunctionBegin; 5680 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5681 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5682 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5683 * and then create a submatrix (that often is an overlapping matrix) 5684 * */ 5685 if (reuse == MAT_INITIAL_MATRIX) { 5686 /* Use a hash table to figure out unique keys */ 5687 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5688 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5689 count = 0; 5690 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5691 for (i = 0; i < a->B->cmap->n; i++) { 5692 key = a->garray[i] / dof; 5693 PetscCall(PetscHMapIHas(hamp, key, &has)); 5694 if (!has) { 5695 mapping[i] = count; 5696 PetscCall(PetscHMapISet(hamp, key, count++)); 5697 } else { 5698 /* Current 'i' has the same value the previous step */ 5699 mapping[i] = count - 1; 5700 } 5701 } 5702 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5703 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5704 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5705 PetscCall(PetscCalloc1(htsize, &rowindices)); 5706 off = 0; 5707 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5708 PetscCall(PetscHMapIDestroy(&hamp)); 5709 PetscCall(PetscSortInt(htsize, rowindices)); 5710 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5711 /* In case, the matrix was already created but users want to recreate the matrix */ 5712 PetscCall(MatDestroy(P_oth)); 5713 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5714 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5715 PetscCall(ISDestroy(&map)); 5716 PetscCall(ISDestroy(&rows)); 5717 } else if (reuse == MAT_REUSE_MATRIX) { 5718 /* If matrix was already created, we simply update values using SF objects 5719 * that as attached to the matrix earlier. 5720 */ 5721 const PetscScalar *pd_a, *po_a; 5722 5723 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5724 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5725 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5726 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5727 /* Update values in place */ 5728 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5729 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5730 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5731 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5732 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5733 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5734 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5735 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5736 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5737 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5738 PetscFunctionReturn(PETSC_SUCCESS); 5739 } 5740 5741 /*@C 5742 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5743 5744 Collective 5745 5746 Input Parameters: 5747 + A - the first matrix in `MATMPIAIJ` format 5748 . B - the second matrix in `MATMPIAIJ` format 5749 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5750 5751 Output Parameters: 5752 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5753 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5754 - B_seq - the sequential matrix generated 5755 5756 Level: developer 5757 5758 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5759 @*/ 5760 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5761 { 5762 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5763 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5764 IS isrowb, iscolb; 5765 Mat *bseq = NULL; 5766 5767 PetscFunctionBegin; 5768 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5769 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5770 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5771 5772 if (scall == MAT_INITIAL_MATRIX) { 5773 start = A->cmap->rstart; 5774 cmap = a->garray; 5775 nzA = a->A->cmap->n; 5776 nzB = a->B->cmap->n; 5777 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5778 ncols = 0; 5779 for (i = 0; i < nzB; i++) { /* row < local row index */ 5780 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5781 else break; 5782 } 5783 imark = i; 5784 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5785 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5786 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5787 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5788 } else { 5789 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5790 isrowb = *rowb; 5791 iscolb = *colb; 5792 PetscCall(PetscMalloc1(1, &bseq)); 5793 bseq[0] = *B_seq; 5794 } 5795 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5796 *B_seq = bseq[0]; 5797 PetscCall(PetscFree(bseq)); 5798 if (!rowb) { 5799 PetscCall(ISDestroy(&isrowb)); 5800 } else { 5801 *rowb = isrowb; 5802 } 5803 if (!colb) { 5804 PetscCall(ISDestroy(&iscolb)); 5805 } else { 5806 *colb = iscolb; 5807 } 5808 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5809 PetscFunctionReturn(PETSC_SUCCESS); 5810 } 5811 5812 /* 5813 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5814 of the OFF-DIAGONAL portion of local A 5815 5816 Collective 5817 5818 Input Parameters: 5819 + A,B - the matrices in `MATMPIAIJ` format 5820 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5821 5822 Output Parameter: 5823 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5824 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5825 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5826 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5827 5828 Developer Note: 5829 This directly accesses information inside the VecScatter associated with the matrix-vector product 5830 for this matrix. This is not desirable.. 5831 5832 Level: developer 5833 5834 */ 5835 5836 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5837 { 5838 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5839 VecScatter ctx; 5840 MPI_Comm comm; 5841 const PetscMPIInt *rprocs, *sprocs; 5842 PetscMPIInt nrecvs, nsends; 5843 const PetscInt *srow, *rstarts, *sstarts; 5844 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5845 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5846 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5847 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5848 PetscMPIInt size, tag, rank, nreqs; 5849 5850 PetscFunctionBegin; 5851 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5852 PetscCallMPI(MPI_Comm_size(comm, &size)); 5853 5854 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5855 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5856 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5857 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5858 5859 if (size == 1) { 5860 startsj_s = NULL; 5861 bufa_ptr = NULL; 5862 *B_oth = NULL; 5863 PetscFunctionReturn(PETSC_SUCCESS); 5864 } 5865 5866 ctx = a->Mvctx; 5867 tag = ((PetscObject)ctx)->tag; 5868 5869 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5870 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5871 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5872 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5873 PetscCall(PetscMalloc1(nreqs, &reqs)); 5874 rwaits = reqs; 5875 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5876 5877 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5878 if (scall == MAT_INITIAL_MATRIX) { 5879 /* i-array */ 5880 /* post receives */ 5881 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5882 for (i = 0; i < nrecvs; i++) { 5883 rowlen = rvalues + rstarts[i] * rbs; 5884 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5885 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5886 } 5887 5888 /* pack the outgoing message */ 5889 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5890 5891 sstartsj[0] = 0; 5892 rstartsj[0] = 0; 5893 len = 0; /* total length of j or a array to be sent */ 5894 if (nsends) { 5895 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5896 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5897 } 5898 for (i = 0; i < nsends; i++) { 5899 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5900 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5901 for (j = 0; j < nrows; j++) { 5902 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5903 for (l = 0; l < sbs; l++) { 5904 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5905 5906 rowlen[j * sbs + l] = ncols; 5907 5908 len += ncols; 5909 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5910 } 5911 k++; 5912 } 5913 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5914 5915 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5916 } 5917 /* recvs and sends of i-array are completed */ 5918 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5919 PetscCall(PetscFree(svalues)); 5920 5921 /* allocate buffers for sending j and a arrays */ 5922 PetscCall(PetscMalloc1(len + 1, &bufj)); 5923 PetscCall(PetscMalloc1(len + 1, &bufa)); 5924 5925 /* create i-array of B_oth */ 5926 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5927 5928 b_othi[0] = 0; 5929 len = 0; /* total length of j or a array to be received */ 5930 k = 0; 5931 for (i = 0; i < nrecvs; i++) { 5932 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5933 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5934 for (j = 0; j < nrows; j++) { 5935 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5936 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5937 k++; 5938 } 5939 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5940 } 5941 PetscCall(PetscFree(rvalues)); 5942 5943 /* allocate space for j and a arrays of B_oth */ 5944 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5945 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5946 5947 /* j-array */ 5948 /* post receives of j-array */ 5949 for (i = 0; i < nrecvs; i++) { 5950 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5951 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5952 } 5953 5954 /* pack the outgoing message j-array */ 5955 if (nsends) k = sstarts[0]; 5956 for (i = 0; i < nsends; i++) { 5957 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5958 bufJ = bufj + sstartsj[i]; 5959 for (j = 0; j < nrows; j++) { 5960 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5961 for (ll = 0; ll < sbs; ll++) { 5962 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5963 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5964 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5965 } 5966 } 5967 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5968 } 5969 5970 /* recvs and sends of j-array are completed */ 5971 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5972 } else if (scall == MAT_REUSE_MATRIX) { 5973 sstartsj = *startsj_s; 5974 rstartsj = *startsj_r; 5975 bufa = *bufa_ptr; 5976 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5977 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5978 5979 /* a-array */ 5980 /* post receives of a-array */ 5981 for (i = 0; i < nrecvs; i++) { 5982 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5983 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5984 } 5985 5986 /* pack the outgoing message a-array */ 5987 if (nsends) k = sstarts[0]; 5988 for (i = 0; i < nsends; i++) { 5989 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5990 bufA = bufa + sstartsj[i]; 5991 for (j = 0; j < nrows; j++) { 5992 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5993 for (ll = 0; ll < sbs; ll++) { 5994 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5995 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5996 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5997 } 5998 } 5999 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6000 } 6001 /* recvs and sends of a-array are completed */ 6002 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6003 PetscCall(PetscFree(reqs)); 6004 6005 if (scall == MAT_INITIAL_MATRIX) { 6006 Mat_SeqAIJ *b_oth; 6007 6008 /* put together the new matrix */ 6009 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6010 6011 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6012 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6013 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6014 b_oth->free_a = PETSC_TRUE; 6015 b_oth->free_ij = PETSC_TRUE; 6016 b_oth->nonew = 0; 6017 6018 PetscCall(PetscFree(bufj)); 6019 if (!startsj_s || !bufa_ptr) { 6020 PetscCall(PetscFree2(sstartsj, rstartsj)); 6021 PetscCall(PetscFree(bufa_ptr)); 6022 } else { 6023 *startsj_s = sstartsj; 6024 *startsj_r = rstartsj; 6025 *bufa_ptr = bufa; 6026 } 6027 } else if (scall == MAT_REUSE_MATRIX) { 6028 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6029 } 6030 6031 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6032 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6033 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6034 PetscFunctionReturn(PETSC_SUCCESS); 6035 } 6036 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6040 #if defined(PETSC_HAVE_MKL_SPARSE) 6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6042 #endif 6043 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6045 #if defined(PETSC_HAVE_ELEMENTAL) 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6047 #endif 6048 #if defined(PETSC_HAVE_SCALAPACK) 6049 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 #if defined(PETSC_HAVE_HYPRE) 6052 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 #if defined(PETSC_HAVE_CUDA) 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6056 #endif 6057 #if defined(PETSC_HAVE_HIP) 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6059 #endif 6060 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6062 #endif 6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6064 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6065 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6066 6067 /* 6068 Computes (B'*A')' since computing B*A directly is untenable 6069 6070 n p p 6071 [ ] [ ] [ ] 6072 m [ A ] * n [ B ] = m [ C ] 6073 [ ] [ ] [ ] 6074 6075 */ 6076 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6077 { 6078 Mat At, Bt, Ct; 6079 6080 PetscFunctionBegin; 6081 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6082 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6083 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6084 PetscCall(MatDestroy(&At)); 6085 PetscCall(MatDestroy(&Bt)); 6086 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6087 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6088 PetscCall(MatDestroy(&Ct)); 6089 PetscFunctionReturn(PETSC_SUCCESS); 6090 } 6091 6092 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6093 { 6094 PetscBool cisdense; 6095 6096 PetscFunctionBegin; 6097 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6098 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6099 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6100 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6101 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6102 PetscCall(MatSetUp(C)); 6103 6104 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6105 PetscFunctionReturn(PETSC_SUCCESS); 6106 } 6107 6108 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6109 { 6110 Mat_Product *product = C->product; 6111 Mat A = product->A, B = product->B; 6112 6113 PetscFunctionBegin; 6114 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6115 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6116 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6117 C->ops->productsymbolic = MatProductSymbolic_AB; 6118 PetscFunctionReturn(PETSC_SUCCESS); 6119 } 6120 6121 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6122 { 6123 Mat_Product *product = C->product; 6124 6125 PetscFunctionBegin; 6126 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6127 PetscFunctionReturn(PETSC_SUCCESS); 6128 } 6129 6130 /* 6131 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6132 6133 Input Parameters: 6134 6135 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6136 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6137 6138 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6139 6140 For Set1, j1[] contains column indices of the nonzeros. 6141 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6142 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6143 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6144 6145 Similar for Set2. 6146 6147 This routine merges the two sets of nonzeros row by row and removes repeats. 6148 6149 Output Parameters: (memory is allocated by the caller) 6150 6151 i[],j[]: the CSR of the merged matrix, which has m rows. 6152 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6153 imap2[]: similar to imap1[], but for Set2. 6154 Note we order nonzeros row-by-row and from left to right. 6155 */ 6156 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6157 { 6158 PetscInt r, m; /* Row index of mat */ 6159 PetscCount t, t1, t2, b1, e1, b2, e2; 6160 6161 PetscFunctionBegin; 6162 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6163 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6164 i[0] = 0; 6165 for (r = 0; r < m; r++) { /* Do row by row merging */ 6166 b1 = rowBegin1[r]; 6167 e1 = rowEnd1[r]; 6168 b2 = rowBegin2[r]; 6169 e2 = rowEnd2[r]; 6170 while (b1 < e1 && b2 < e2) { 6171 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6172 j[t] = j1[b1]; 6173 imap1[t1] = t; 6174 imap2[t2] = t; 6175 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6176 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6177 t1++; 6178 t2++; 6179 t++; 6180 } else if (j1[b1] < j2[b2]) { 6181 j[t] = j1[b1]; 6182 imap1[t1] = t; 6183 b1 += jmap1[t1 + 1] - jmap1[t1]; 6184 t1++; 6185 t++; 6186 } else { 6187 j[t] = j2[b2]; 6188 imap2[t2] = t; 6189 b2 += jmap2[t2 + 1] - jmap2[t2]; 6190 t2++; 6191 t++; 6192 } 6193 } 6194 /* Merge the remaining in either j1[] or j2[] */ 6195 while (b1 < e1) { 6196 j[t] = j1[b1]; 6197 imap1[t1] = t; 6198 b1 += jmap1[t1 + 1] - jmap1[t1]; 6199 t1++; 6200 t++; 6201 } 6202 while (b2 < e2) { 6203 j[t] = j2[b2]; 6204 imap2[t2] = t; 6205 b2 += jmap2[t2 + 1] - jmap2[t2]; 6206 t2++; 6207 t++; 6208 } 6209 PetscCall(PetscIntCast(t, i + r + 1)); 6210 } 6211 PetscFunctionReturn(PETSC_SUCCESS); 6212 } 6213 6214 /* 6215 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6216 6217 Input Parameters: 6218 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6219 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6220 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6221 6222 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6223 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6224 6225 Output Parameters: 6226 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6227 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6228 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6229 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6230 6231 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6232 Atot: number of entries belonging to the diagonal block. 6233 Annz: number of unique nonzeros belonging to the diagonal block. 6234 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6235 repeats (i.e., same 'i,j' pair). 6236 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6237 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6238 6239 Atot: number of entries belonging to the diagonal block 6240 Annz: number of unique nonzeros belonging to the diagonal block. 6241 6242 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6243 6244 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6245 */ 6246 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6247 { 6248 PetscInt cstart, cend, rstart, rend, row, col; 6249 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6250 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6251 PetscCount k, m, p, q, r, s, mid; 6252 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6253 6254 PetscFunctionBegin; 6255 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6256 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6257 m = rend - rstart; 6258 6259 /* Skip negative rows */ 6260 for (k = 0; k < n; k++) 6261 if (i[k] >= 0) break; 6262 6263 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6264 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6265 */ 6266 while (k < n) { 6267 row = i[k]; 6268 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6269 for (s = k; s < n; s++) 6270 if (i[s] != row) break; 6271 6272 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6273 for (p = k; p < s; p++) { 6274 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6275 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6276 } 6277 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6278 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6279 rowBegin[row - rstart] = k; 6280 rowMid[row - rstart] = mid; 6281 rowEnd[row - rstart] = s; 6282 6283 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6284 Atot += mid - k; 6285 Btot += s - mid; 6286 6287 /* Count unique nonzeros of this diag row */ 6288 for (p = k; p < mid;) { 6289 col = j[p]; 6290 do { 6291 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6292 p++; 6293 } while (p < mid && j[p] == col); 6294 Annz++; 6295 } 6296 6297 /* Count unique nonzeros of this offdiag row */ 6298 for (p = mid; p < s;) { 6299 col = j[p]; 6300 do { 6301 p++; 6302 } while (p < s && j[p] == col); 6303 Bnnz++; 6304 } 6305 k = s; 6306 } 6307 6308 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6309 PetscCall(PetscMalloc1(Atot, &Aperm)); 6310 PetscCall(PetscMalloc1(Btot, &Bperm)); 6311 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6312 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6313 6314 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6315 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6316 for (r = 0; r < m; r++) { 6317 k = rowBegin[r]; 6318 mid = rowMid[r]; 6319 s = rowEnd[r]; 6320 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6321 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6322 Atot += mid - k; 6323 Btot += s - mid; 6324 6325 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6326 for (p = k; p < mid;) { 6327 col = j[p]; 6328 q = p; 6329 do { 6330 p++; 6331 } while (p < mid && j[p] == col); 6332 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6333 Annz++; 6334 } 6335 6336 for (p = mid; p < s;) { 6337 col = j[p]; 6338 q = p; 6339 do { 6340 p++; 6341 } while (p < s && j[p] == col); 6342 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6343 Bnnz++; 6344 } 6345 } 6346 /* Output */ 6347 *Aperm_ = Aperm; 6348 *Annz_ = Annz; 6349 *Atot_ = Atot; 6350 *Ajmap_ = Ajmap; 6351 *Bperm_ = Bperm; 6352 *Bnnz_ = Bnnz; 6353 *Btot_ = Btot; 6354 *Bjmap_ = Bjmap; 6355 PetscFunctionReturn(PETSC_SUCCESS); 6356 } 6357 6358 /* 6359 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6360 6361 Input Parameters: 6362 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6363 nnz: number of unique nonzeros in the merged matrix 6364 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6365 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6366 6367 Output Parameter: (memory is allocated by the caller) 6368 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6369 6370 Example: 6371 nnz1 = 4 6372 nnz = 6 6373 imap = [1,3,4,5] 6374 jmap = [0,3,5,6,7] 6375 then, 6376 jmap_new = [0,0,3,3,5,6,7] 6377 */ 6378 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6379 { 6380 PetscCount k, p; 6381 6382 PetscFunctionBegin; 6383 jmap_new[0] = 0; 6384 p = nnz; /* p loops over jmap_new[] backwards */ 6385 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6386 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6387 } 6388 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6389 PetscFunctionReturn(PETSC_SUCCESS); 6390 } 6391 6392 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6393 { 6394 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6395 6396 PetscFunctionBegin; 6397 PetscCall(PetscSFDestroy(&coo->sf)); 6398 PetscCall(PetscFree(coo->Aperm1)); 6399 PetscCall(PetscFree(coo->Bperm1)); 6400 PetscCall(PetscFree(coo->Ajmap1)); 6401 PetscCall(PetscFree(coo->Bjmap1)); 6402 PetscCall(PetscFree(coo->Aimap2)); 6403 PetscCall(PetscFree(coo->Bimap2)); 6404 PetscCall(PetscFree(coo->Aperm2)); 6405 PetscCall(PetscFree(coo->Bperm2)); 6406 PetscCall(PetscFree(coo->Ajmap2)); 6407 PetscCall(PetscFree(coo->Bjmap2)); 6408 PetscCall(PetscFree(coo->Cperm1)); 6409 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6410 PetscCall(PetscFree(coo)); 6411 PetscFunctionReturn(PETSC_SUCCESS); 6412 } 6413 6414 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6415 { 6416 MPI_Comm comm; 6417 PetscMPIInt rank, size; 6418 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6419 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6420 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6421 PetscContainer container; 6422 MatCOOStruct_MPIAIJ *coo; 6423 6424 PetscFunctionBegin; 6425 PetscCall(PetscFree(mpiaij->garray)); 6426 PetscCall(VecDestroy(&mpiaij->lvec)); 6427 #if defined(PETSC_USE_CTABLE) 6428 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6429 #else 6430 PetscCall(PetscFree(mpiaij->colmap)); 6431 #endif 6432 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6433 mat->assembled = PETSC_FALSE; 6434 mat->was_assembled = PETSC_FALSE; 6435 6436 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6437 PetscCallMPI(MPI_Comm_size(comm, &size)); 6438 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6439 PetscCall(PetscLayoutSetUp(mat->rmap)); 6440 PetscCall(PetscLayoutSetUp(mat->cmap)); 6441 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6442 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6443 PetscCall(MatGetLocalSize(mat, &m, &n)); 6444 PetscCall(MatGetSize(mat, &M, &N)); 6445 6446 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6447 /* entries come first, then local rows, then remote rows. */ 6448 PetscCount n1 = coo_n, *perm1; 6449 PetscInt *i1 = coo_i, *j1 = coo_j; 6450 6451 PetscCall(PetscMalloc1(n1, &perm1)); 6452 for (k = 0; k < n1; k++) perm1[k] = k; 6453 6454 /* Manipulate indices so that entries with negative row or col indices will have smallest 6455 row indices, local entries will have greater but negative row indices, and remote entries 6456 will have positive row indices. 6457 */ 6458 for (k = 0; k < n1; k++) { 6459 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6460 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6461 else { 6462 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6463 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6464 } 6465 } 6466 6467 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6468 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6469 6470 /* Advance k to the first entry we need to take care of */ 6471 for (k = 0; k < n1; k++) 6472 if (i1[k] > PETSC_INT_MIN) break; 6473 PetscCount i1start = k; 6474 6475 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6476 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6477 6478 /* Send remote rows to their owner */ 6479 /* Find which rows should be sent to which remote ranks*/ 6480 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6481 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6482 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6483 const PetscInt *ranges; 6484 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6485 6486 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6487 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6488 for (k = rem; k < n1;) { 6489 PetscMPIInt owner; 6490 PetscInt firstRow, lastRow; 6491 6492 /* Locate a row range */ 6493 firstRow = i1[k]; /* first row of this owner */ 6494 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6495 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6496 6497 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6498 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6499 6500 /* All entries in [k,p) belong to this remote owner */ 6501 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6502 PetscMPIInt *sendto2; 6503 PetscInt *nentries2; 6504 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6505 6506 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6507 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6508 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6509 PetscCall(PetscFree2(sendto, nentries2)); 6510 sendto = sendto2; 6511 nentries = nentries2; 6512 maxNsend = maxNsend2; 6513 } 6514 sendto[nsend] = owner; 6515 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6516 nsend++; 6517 k = p; 6518 } 6519 6520 /* Build 1st SF to know offsets on remote to send data */ 6521 PetscSF sf1; 6522 PetscInt nroots = 1, nroots2 = 0; 6523 PetscInt nleaves = nsend, nleaves2 = 0; 6524 PetscInt *offsets; 6525 PetscSFNode *iremote; 6526 6527 PetscCall(PetscSFCreate(comm, &sf1)); 6528 PetscCall(PetscMalloc1(nsend, &iremote)); 6529 PetscCall(PetscMalloc1(nsend, &offsets)); 6530 for (k = 0; k < nsend; k++) { 6531 iremote[k].rank = sendto[k]; 6532 iremote[k].index = 0; 6533 nleaves2 += nentries[k]; 6534 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6535 } 6536 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6537 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6538 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6539 PetscCall(PetscSFDestroy(&sf1)); 6540 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6541 6542 /* Build 2nd SF to send remote COOs to their owner */ 6543 PetscSF sf2; 6544 nroots = nroots2; 6545 nleaves = nleaves2; 6546 PetscCall(PetscSFCreate(comm, &sf2)); 6547 PetscCall(PetscSFSetFromOptions(sf2)); 6548 PetscCall(PetscMalloc1(nleaves, &iremote)); 6549 p = 0; 6550 for (k = 0; k < nsend; k++) { 6551 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6552 for (q = 0; q < nentries[k]; q++, p++) { 6553 iremote[p].rank = sendto[k]; 6554 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6555 } 6556 } 6557 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6558 6559 /* Send the remote COOs to their owner */ 6560 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6561 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6562 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6563 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6564 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6565 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6566 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6567 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6568 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6569 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6570 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6571 6572 PetscCall(PetscFree(offsets)); 6573 PetscCall(PetscFree2(sendto, nentries)); 6574 6575 /* Sort received COOs by row along with the permutation array */ 6576 for (k = 0; k < n2; k++) perm2[k] = k; 6577 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6578 6579 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6580 PetscCount *Cperm1; 6581 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6582 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6583 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6584 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6585 6586 /* Support for HYPRE matrices, kind of a hack. 6587 Swap min column with diagonal so that diagonal values will go first */ 6588 PetscBool hypre; 6589 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6590 if (hypre) { 6591 PetscInt *minj; 6592 PetscBT hasdiag; 6593 6594 PetscCall(PetscBTCreate(m, &hasdiag)); 6595 PetscCall(PetscMalloc1(m, &minj)); 6596 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6597 for (k = i1start; k < rem; k++) { 6598 if (j1[k] < cstart || j1[k] >= cend) continue; 6599 const PetscInt rindex = i1[k] - rstart; 6600 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6601 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6602 } 6603 for (k = 0; k < n2; k++) { 6604 if (j2[k] < cstart || j2[k] >= cend) continue; 6605 const PetscInt rindex = i2[k] - rstart; 6606 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6607 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6608 } 6609 for (k = i1start; k < rem; k++) { 6610 const PetscInt rindex = i1[k] - rstart; 6611 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6612 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6613 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6614 } 6615 for (k = 0; k < n2; k++) { 6616 const PetscInt rindex = i2[k] - rstart; 6617 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6618 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6619 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6620 } 6621 PetscCall(PetscBTDestroy(&hasdiag)); 6622 PetscCall(PetscFree(minj)); 6623 } 6624 6625 /* Split local COOs and received COOs into diag/offdiag portions */ 6626 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6627 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6628 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6629 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6630 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6631 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6632 6633 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6634 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6635 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6636 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6637 6638 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6639 PetscInt *Ai, *Bi; 6640 PetscInt *Aj, *Bj; 6641 6642 PetscCall(PetscMalloc1(m + 1, &Ai)); 6643 PetscCall(PetscMalloc1(m + 1, &Bi)); 6644 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6645 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6646 6647 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6648 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6649 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6650 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6651 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6652 6653 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6654 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6655 6656 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6657 /* expect nonzeros in A/B most likely have local contributing entries */ 6658 PetscInt Annz = Ai[m]; 6659 PetscInt Bnnz = Bi[m]; 6660 PetscCount *Ajmap1_new, *Bjmap1_new; 6661 6662 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6663 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6664 6665 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6666 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6667 6668 PetscCall(PetscFree(Aimap1)); 6669 PetscCall(PetscFree(Ajmap1)); 6670 PetscCall(PetscFree(Bimap1)); 6671 PetscCall(PetscFree(Bjmap1)); 6672 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6673 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6674 PetscCall(PetscFree(perm1)); 6675 PetscCall(PetscFree3(i2, j2, perm2)); 6676 6677 Ajmap1 = Ajmap1_new; 6678 Bjmap1 = Bjmap1_new; 6679 6680 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6681 if (Annz < Annz1 + Annz2) { 6682 PetscInt *Aj_new; 6683 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6684 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6685 PetscCall(PetscFree(Aj)); 6686 Aj = Aj_new; 6687 } 6688 6689 if (Bnnz < Bnnz1 + Bnnz2) { 6690 PetscInt *Bj_new; 6691 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6692 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6693 PetscCall(PetscFree(Bj)); 6694 Bj = Bj_new; 6695 } 6696 6697 /* Create new submatrices for on-process and off-process coupling */ 6698 PetscScalar *Aa, *Ba; 6699 MatType rtype; 6700 Mat_SeqAIJ *a, *b; 6701 PetscObjectState state; 6702 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6703 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6704 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6705 if (cstart) { 6706 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6707 } 6708 6709 PetscCall(MatGetRootType_Private(mat, &rtype)); 6710 6711 MatSeqXAIJGetOptions_Private(mpiaij->A); 6712 PetscCall(MatDestroy(&mpiaij->A)); 6713 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6714 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6715 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6716 6717 MatSeqXAIJGetOptions_Private(mpiaij->B); 6718 PetscCall(MatDestroy(&mpiaij->B)); 6719 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6720 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6721 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6722 6723 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6724 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6725 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6726 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6727 6728 a = (Mat_SeqAIJ *)mpiaij->A->data; 6729 b = (Mat_SeqAIJ *)mpiaij->B->data; 6730 a->free_a = PETSC_TRUE; 6731 a->free_ij = PETSC_TRUE; 6732 b->free_a = PETSC_TRUE; 6733 b->free_ij = PETSC_TRUE; 6734 a->maxnz = a->nz; 6735 b->maxnz = b->nz; 6736 6737 /* conversion must happen AFTER multiply setup */ 6738 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6739 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6740 PetscCall(VecDestroy(&mpiaij->lvec)); 6741 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6742 6743 // Put the COO struct in a container and then attach that to the matrix 6744 PetscCall(PetscMalloc1(1, &coo)); 6745 coo->n = coo_n; 6746 coo->sf = sf2; 6747 coo->sendlen = nleaves; 6748 coo->recvlen = nroots; 6749 coo->Annz = Annz; 6750 coo->Bnnz = Bnnz; 6751 coo->Annz2 = Annz2; 6752 coo->Bnnz2 = Bnnz2; 6753 coo->Atot1 = Atot1; 6754 coo->Atot2 = Atot2; 6755 coo->Btot1 = Btot1; 6756 coo->Btot2 = Btot2; 6757 coo->Ajmap1 = Ajmap1; 6758 coo->Aperm1 = Aperm1; 6759 coo->Bjmap1 = Bjmap1; 6760 coo->Bperm1 = Bperm1; 6761 coo->Aimap2 = Aimap2; 6762 coo->Ajmap2 = Ajmap2; 6763 coo->Aperm2 = Aperm2; 6764 coo->Bimap2 = Bimap2; 6765 coo->Bjmap2 = Bjmap2; 6766 coo->Bperm2 = Bperm2; 6767 coo->Cperm1 = Cperm1; 6768 // Allocate in preallocation. If not used, it has zero cost on host 6769 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6770 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6771 PetscCall(PetscContainerSetPointer(container, coo)); 6772 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6773 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6774 PetscCall(PetscContainerDestroy(&container)); 6775 PetscFunctionReturn(PETSC_SUCCESS); 6776 } 6777 6778 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6779 { 6780 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6781 Mat A = mpiaij->A, B = mpiaij->B; 6782 PetscScalar *Aa, *Ba; 6783 PetscScalar *sendbuf, *recvbuf; 6784 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6785 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6786 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6787 const PetscCount *Cperm1; 6788 PetscContainer container; 6789 MatCOOStruct_MPIAIJ *coo; 6790 6791 PetscFunctionBegin; 6792 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6793 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6794 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6795 sendbuf = coo->sendbuf; 6796 recvbuf = coo->recvbuf; 6797 Ajmap1 = coo->Ajmap1; 6798 Ajmap2 = coo->Ajmap2; 6799 Aimap2 = coo->Aimap2; 6800 Bjmap1 = coo->Bjmap1; 6801 Bjmap2 = coo->Bjmap2; 6802 Bimap2 = coo->Bimap2; 6803 Aperm1 = coo->Aperm1; 6804 Aperm2 = coo->Aperm2; 6805 Bperm1 = coo->Bperm1; 6806 Bperm2 = coo->Bperm2; 6807 Cperm1 = coo->Cperm1; 6808 6809 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6810 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6811 6812 /* Pack entries to be sent to remote */ 6813 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6814 6815 /* Send remote entries to their owner and overlap the communication with local computation */ 6816 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6817 /* Add local entries to A and B */ 6818 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6819 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6820 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6821 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6822 } 6823 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6824 PetscScalar sum = 0.0; 6825 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6826 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6827 } 6828 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6829 6830 /* Add received remote entries to A and B */ 6831 for (PetscCount i = 0; i < coo->Annz2; i++) { 6832 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6833 } 6834 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6835 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6836 } 6837 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6838 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6839 PetscFunctionReturn(PETSC_SUCCESS); 6840 } 6841 6842 /*MC 6843 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6844 6845 Options Database Keys: 6846 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6847 6848 Level: beginner 6849 6850 Notes: 6851 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6852 in this case the values associated with the rows and columns one passes in are set to zero 6853 in the matrix 6854 6855 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6856 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6857 6858 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6859 M*/ 6860 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6861 { 6862 Mat_MPIAIJ *b; 6863 PetscMPIInt size; 6864 6865 PetscFunctionBegin; 6866 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6867 6868 PetscCall(PetscNew(&b)); 6869 B->data = (void *)b; 6870 B->ops[0] = MatOps_Values; 6871 B->assembled = PETSC_FALSE; 6872 B->insertmode = NOT_SET_VALUES; 6873 b->size = size; 6874 6875 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6876 6877 /* build cache for off array entries formed */ 6878 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6879 6880 b->donotstash = PETSC_FALSE; 6881 b->colmap = NULL; 6882 b->garray = NULL; 6883 b->roworiented = PETSC_TRUE; 6884 6885 /* stuff used for matrix vector multiply */ 6886 b->lvec = NULL; 6887 b->Mvctx = NULL; 6888 6889 /* stuff for MatGetRow() */ 6890 b->rowindices = NULL; 6891 b->rowvalues = NULL; 6892 b->getrowactive = PETSC_FALSE; 6893 6894 /* flexible pointer used in CUSPARSE classes */ 6895 b->spptr = NULL; 6896 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6907 #if defined(PETSC_HAVE_CUDA) 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6909 #endif 6910 #if defined(PETSC_HAVE_HIP) 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6912 #endif 6913 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6915 #endif 6916 #if defined(PETSC_HAVE_MKL_SPARSE) 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6918 #endif 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6923 #if defined(PETSC_HAVE_ELEMENTAL) 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6925 #endif 6926 #if defined(PETSC_HAVE_SCALAPACK) 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6928 #endif 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6931 #if defined(PETSC_HAVE_HYPRE) 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6934 #endif 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6939 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6940 PetscFunctionReturn(PETSC_SUCCESS); 6941 } 6942 6943 /*@ 6944 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6945 and "off-diagonal" part of the matrix in CSR format. 6946 6947 Collective 6948 6949 Input Parameters: 6950 + comm - MPI communicator 6951 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6952 . n - This value should be the same as the local size used in creating the 6953 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6954 calculated if `N` is given) For square matrices `n` is almost always `m`. 6955 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6956 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6957 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6958 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6959 . a - matrix values 6960 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6961 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6962 - oa - matrix values 6963 6964 Output Parameter: 6965 . mat - the matrix 6966 6967 Level: advanced 6968 6969 Notes: 6970 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6971 must free the arrays once the matrix has been destroyed and not before. 6972 6973 The `i` and `j` indices are 0 based 6974 6975 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6976 6977 This sets local rows and cannot be used to set off-processor values. 6978 6979 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6980 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6981 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6982 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6983 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6984 communication if it is known that only local entries will be set. 6985 6986 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6987 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6988 @*/ 6989 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6990 { 6991 Mat_MPIAIJ *maij; 6992 6993 PetscFunctionBegin; 6994 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6995 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6996 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6997 PetscCall(MatCreate(comm, mat)); 6998 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6999 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7000 maij = (Mat_MPIAIJ *)(*mat)->data; 7001 7002 (*mat)->preallocated = PETSC_TRUE; 7003 7004 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7005 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7006 7007 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7008 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7009 7010 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7011 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7012 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7013 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7014 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7015 PetscFunctionReturn(PETSC_SUCCESS); 7016 } 7017 7018 typedef struct { 7019 Mat *mp; /* intermediate products */ 7020 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7021 PetscInt cp; /* number of intermediate products */ 7022 7023 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7024 PetscInt *startsj_s, *startsj_r; 7025 PetscScalar *bufa; 7026 Mat P_oth; 7027 7028 /* may take advantage of merging product->B */ 7029 Mat Bloc; /* B-local by merging diag and off-diag */ 7030 7031 /* cusparse does not have support to split between symbolic and numeric phases. 7032 When api_user is true, we don't need to update the numerical values 7033 of the temporary storage */ 7034 PetscBool reusesym; 7035 7036 /* support for COO values insertion */ 7037 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7038 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7039 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7040 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7041 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7042 PetscMemType mtype; 7043 7044 /* customization */ 7045 PetscBool abmerge; 7046 PetscBool P_oth_bind; 7047 } MatMatMPIAIJBACKEND; 7048 7049 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7050 { 7051 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7052 PetscInt i; 7053 7054 PetscFunctionBegin; 7055 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7056 PetscCall(PetscFree(mmdata->bufa)); 7057 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7058 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7059 PetscCall(MatDestroy(&mmdata->P_oth)); 7060 PetscCall(MatDestroy(&mmdata->Bloc)); 7061 PetscCall(PetscSFDestroy(&mmdata->sf)); 7062 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7063 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7064 PetscCall(PetscFree(mmdata->own[0])); 7065 PetscCall(PetscFree(mmdata->own)); 7066 PetscCall(PetscFree(mmdata->off[0])); 7067 PetscCall(PetscFree(mmdata->off)); 7068 PetscCall(PetscFree(mmdata)); 7069 PetscFunctionReturn(PETSC_SUCCESS); 7070 } 7071 7072 /* Copy selected n entries with indices in idx[] of A to v[]. 7073 If idx is NULL, copy the whole data array of A to v[] 7074 */ 7075 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7076 { 7077 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7078 7079 PetscFunctionBegin; 7080 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7081 if (f) { 7082 PetscCall((*f)(A, n, idx, v)); 7083 } else { 7084 const PetscScalar *vv; 7085 7086 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7087 if (n && idx) { 7088 PetscScalar *w = v; 7089 const PetscInt *oi = idx; 7090 PetscInt j; 7091 7092 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7093 } else { 7094 PetscCall(PetscArraycpy(v, vv, n)); 7095 } 7096 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7097 } 7098 PetscFunctionReturn(PETSC_SUCCESS); 7099 } 7100 7101 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7102 { 7103 MatMatMPIAIJBACKEND *mmdata; 7104 PetscInt i, n_d, n_o; 7105 7106 PetscFunctionBegin; 7107 MatCheckProduct(C, 1); 7108 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7109 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7110 if (!mmdata->reusesym) { /* update temporary matrices */ 7111 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7112 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7113 } 7114 mmdata->reusesym = PETSC_FALSE; 7115 7116 for (i = 0; i < mmdata->cp; i++) { 7117 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7118 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7119 } 7120 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7121 PetscInt noff; 7122 7123 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7124 if (mmdata->mptmp[i]) continue; 7125 if (noff) { 7126 PetscInt nown; 7127 7128 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7129 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7130 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7131 n_o += noff; 7132 n_d += nown; 7133 } else { 7134 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7135 7136 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7137 n_d += mm->nz; 7138 } 7139 } 7140 if (mmdata->hasoffproc) { /* offprocess insertion */ 7141 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7142 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7143 } 7144 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7145 PetscFunctionReturn(PETSC_SUCCESS); 7146 } 7147 7148 /* Support for Pt * A, A * P, or Pt * A * P */ 7149 #define MAX_NUMBER_INTERMEDIATE 4 7150 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7151 { 7152 Mat_Product *product = C->product; 7153 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7154 Mat_MPIAIJ *a, *p; 7155 MatMatMPIAIJBACKEND *mmdata; 7156 ISLocalToGlobalMapping P_oth_l2g = NULL; 7157 IS glob = NULL; 7158 const char *prefix; 7159 char pprefix[256]; 7160 const PetscInt *globidx, *P_oth_idx; 7161 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7162 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7163 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7164 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7165 /* a base offset; type-2: sparse with a local to global map table */ 7166 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7167 7168 MatProductType ptype; 7169 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7170 PetscMPIInt size; 7171 7172 PetscFunctionBegin; 7173 MatCheckProduct(C, 1); 7174 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7175 ptype = product->type; 7176 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7177 ptype = MATPRODUCT_AB; 7178 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7179 } 7180 switch (ptype) { 7181 case MATPRODUCT_AB: 7182 A = product->A; 7183 P = product->B; 7184 m = A->rmap->n; 7185 n = P->cmap->n; 7186 M = A->rmap->N; 7187 N = P->cmap->N; 7188 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7189 break; 7190 case MATPRODUCT_AtB: 7191 P = product->A; 7192 A = product->B; 7193 m = P->cmap->n; 7194 n = A->cmap->n; 7195 M = P->cmap->N; 7196 N = A->cmap->N; 7197 hasoffproc = PETSC_TRUE; 7198 break; 7199 case MATPRODUCT_PtAP: 7200 A = product->A; 7201 P = product->B; 7202 m = P->cmap->n; 7203 n = P->cmap->n; 7204 M = P->cmap->N; 7205 N = P->cmap->N; 7206 hasoffproc = PETSC_TRUE; 7207 break; 7208 default: 7209 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7210 } 7211 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7212 if (size == 1) hasoffproc = PETSC_FALSE; 7213 7214 /* defaults */ 7215 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7216 mp[i] = NULL; 7217 mptmp[i] = PETSC_FALSE; 7218 rmapt[i] = -1; 7219 cmapt[i] = -1; 7220 rmapa[i] = NULL; 7221 cmapa[i] = NULL; 7222 } 7223 7224 /* customization */ 7225 PetscCall(PetscNew(&mmdata)); 7226 mmdata->reusesym = product->api_user; 7227 if (ptype == MATPRODUCT_AB) { 7228 if (product->api_user) { 7229 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7230 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7231 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7232 PetscOptionsEnd(); 7233 } else { 7234 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7235 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7236 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7237 PetscOptionsEnd(); 7238 } 7239 } else if (ptype == MATPRODUCT_PtAP) { 7240 if (product->api_user) { 7241 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7242 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7243 PetscOptionsEnd(); 7244 } else { 7245 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7246 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7247 PetscOptionsEnd(); 7248 } 7249 } 7250 a = (Mat_MPIAIJ *)A->data; 7251 p = (Mat_MPIAIJ *)P->data; 7252 PetscCall(MatSetSizes(C, m, n, M, N)); 7253 PetscCall(PetscLayoutSetUp(C->rmap)); 7254 PetscCall(PetscLayoutSetUp(C->cmap)); 7255 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7256 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7257 7258 cp = 0; 7259 switch (ptype) { 7260 case MATPRODUCT_AB: /* A * P */ 7261 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7262 7263 /* A_diag * P_local (merged or not) */ 7264 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7265 /* P is product->B */ 7266 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7267 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7268 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7269 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7270 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7271 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7272 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7273 mp[cp]->product->api_user = product->api_user; 7274 PetscCall(MatProductSetFromOptions(mp[cp])); 7275 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7276 PetscCall(ISGetIndices(glob, &globidx)); 7277 rmapt[cp] = 1; 7278 cmapt[cp] = 2; 7279 cmapa[cp] = globidx; 7280 mptmp[cp] = PETSC_FALSE; 7281 cp++; 7282 } else { /* A_diag * P_diag and A_diag * P_off */ 7283 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7284 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7285 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7286 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7287 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7288 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7289 mp[cp]->product->api_user = product->api_user; 7290 PetscCall(MatProductSetFromOptions(mp[cp])); 7291 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7292 rmapt[cp] = 1; 7293 cmapt[cp] = 1; 7294 mptmp[cp] = PETSC_FALSE; 7295 cp++; 7296 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7297 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7298 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7299 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7300 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7301 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7302 mp[cp]->product->api_user = product->api_user; 7303 PetscCall(MatProductSetFromOptions(mp[cp])); 7304 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7305 rmapt[cp] = 1; 7306 cmapt[cp] = 2; 7307 cmapa[cp] = p->garray; 7308 mptmp[cp] = PETSC_FALSE; 7309 cp++; 7310 } 7311 7312 /* A_off * P_other */ 7313 if (mmdata->P_oth) { 7314 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7315 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7316 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7317 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7318 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7319 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7320 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7321 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7322 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7323 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7324 mp[cp]->product->api_user = product->api_user; 7325 PetscCall(MatProductSetFromOptions(mp[cp])); 7326 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7327 rmapt[cp] = 1; 7328 cmapt[cp] = 2; 7329 cmapa[cp] = P_oth_idx; 7330 mptmp[cp] = PETSC_FALSE; 7331 cp++; 7332 } 7333 break; 7334 7335 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7336 /* A is product->B */ 7337 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7338 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7339 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7340 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7341 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7342 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7343 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7344 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7345 mp[cp]->product->api_user = product->api_user; 7346 PetscCall(MatProductSetFromOptions(mp[cp])); 7347 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7348 PetscCall(ISGetIndices(glob, &globidx)); 7349 rmapt[cp] = 2; 7350 rmapa[cp] = globidx; 7351 cmapt[cp] = 2; 7352 cmapa[cp] = globidx; 7353 mptmp[cp] = PETSC_FALSE; 7354 cp++; 7355 } else { 7356 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7357 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7358 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7359 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7360 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7361 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7362 mp[cp]->product->api_user = product->api_user; 7363 PetscCall(MatProductSetFromOptions(mp[cp])); 7364 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7365 PetscCall(ISGetIndices(glob, &globidx)); 7366 rmapt[cp] = 1; 7367 cmapt[cp] = 2; 7368 cmapa[cp] = globidx; 7369 mptmp[cp] = PETSC_FALSE; 7370 cp++; 7371 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7372 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7373 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7374 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7375 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7376 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7377 mp[cp]->product->api_user = product->api_user; 7378 PetscCall(MatProductSetFromOptions(mp[cp])); 7379 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7380 rmapt[cp] = 2; 7381 rmapa[cp] = p->garray; 7382 cmapt[cp] = 2; 7383 cmapa[cp] = globidx; 7384 mptmp[cp] = PETSC_FALSE; 7385 cp++; 7386 } 7387 break; 7388 case MATPRODUCT_PtAP: 7389 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7390 /* P is product->B */ 7391 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7392 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7393 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7394 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7395 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7396 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7397 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7398 mp[cp]->product->api_user = product->api_user; 7399 PetscCall(MatProductSetFromOptions(mp[cp])); 7400 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7401 PetscCall(ISGetIndices(glob, &globidx)); 7402 rmapt[cp] = 2; 7403 rmapa[cp] = globidx; 7404 cmapt[cp] = 2; 7405 cmapa[cp] = globidx; 7406 mptmp[cp] = PETSC_FALSE; 7407 cp++; 7408 if (mmdata->P_oth) { 7409 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7410 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7411 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7412 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7413 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7414 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7415 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7416 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7417 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7418 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7419 mp[cp]->product->api_user = product->api_user; 7420 PetscCall(MatProductSetFromOptions(mp[cp])); 7421 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7422 mptmp[cp] = PETSC_TRUE; 7423 cp++; 7424 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7425 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7426 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7427 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7428 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7429 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7430 mp[cp]->product->api_user = product->api_user; 7431 PetscCall(MatProductSetFromOptions(mp[cp])); 7432 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7433 rmapt[cp] = 2; 7434 rmapa[cp] = globidx; 7435 cmapt[cp] = 2; 7436 cmapa[cp] = P_oth_idx; 7437 mptmp[cp] = PETSC_FALSE; 7438 cp++; 7439 } 7440 break; 7441 default: 7442 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7443 } 7444 /* sanity check */ 7445 if (size > 1) 7446 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7447 7448 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7449 for (i = 0; i < cp; i++) { 7450 mmdata->mp[i] = mp[i]; 7451 mmdata->mptmp[i] = mptmp[i]; 7452 } 7453 mmdata->cp = cp; 7454 C->product->data = mmdata; 7455 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7456 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7457 7458 /* memory type */ 7459 mmdata->mtype = PETSC_MEMTYPE_HOST; 7460 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7461 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7462 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7463 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7464 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7465 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7466 7467 /* prepare coo coordinates for values insertion */ 7468 7469 /* count total nonzeros of those intermediate seqaij Mats 7470 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7471 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7472 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7473 */ 7474 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7475 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7476 if (mptmp[cp]) continue; 7477 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7478 const PetscInt *rmap = rmapa[cp]; 7479 const PetscInt mr = mp[cp]->rmap->n; 7480 const PetscInt rs = C->rmap->rstart; 7481 const PetscInt re = C->rmap->rend; 7482 const PetscInt *ii = mm->i; 7483 for (i = 0; i < mr; i++) { 7484 const PetscInt gr = rmap[i]; 7485 const PetscInt nz = ii[i + 1] - ii[i]; 7486 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7487 else ncoo_oown += nz; /* this row is local */ 7488 } 7489 } else ncoo_d += mm->nz; 7490 } 7491 7492 /* 7493 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7494 7495 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7496 7497 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7498 7499 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7500 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7501 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7502 7503 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7504 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7505 */ 7506 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7507 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7508 7509 /* gather (i,j) of nonzeros inserted by remote procs */ 7510 if (hasoffproc) { 7511 PetscSF msf; 7512 PetscInt ncoo2, *coo_i2, *coo_j2; 7513 7514 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7515 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7516 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7517 7518 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7519 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7520 PetscInt *idxoff = mmdata->off[cp]; 7521 PetscInt *idxown = mmdata->own[cp]; 7522 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7523 const PetscInt *rmap = rmapa[cp]; 7524 const PetscInt *cmap = cmapa[cp]; 7525 const PetscInt *ii = mm->i; 7526 PetscInt *coi = coo_i + ncoo_o; 7527 PetscInt *coj = coo_j + ncoo_o; 7528 const PetscInt mr = mp[cp]->rmap->n; 7529 const PetscInt rs = C->rmap->rstart; 7530 const PetscInt re = C->rmap->rend; 7531 const PetscInt cs = C->cmap->rstart; 7532 for (i = 0; i < mr; i++) { 7533 const PetscInt *jj = mm->j + ii[i]; 7534 const PetscInt gr = rmap[i]; 7535 const PetscInt nz = ii[i + 1] - ii[i]; 7536 if (gr < rs || gr >= re) { /* this is an offproc row */ 7537 for (j = ii[i]; j < ii[i + 1]; j++) { 7538 *coi++ = gr; 7539 *idxoff++ = j; 7540 } 7541 if (!cmapt[cp]) { /* already global */ 7542 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7543 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7544 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7545 } else { /* offdiag */ 7546 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7547 } 7548 ncoo_o += nz; 7549 } else { /* this is a local row */ 7550 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7551 } 7552 } 7553 } 7554 mmdata->off[cp + 1] = idxoff; 7555 mmdata->own[cp + 1] = idxown; 7556 } 7557 7558 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7559 PetscInt incoo_o; 7560 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7561 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7562 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7563 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7564 ncoo = ncoo_d + ncoo_oown + ncoo2; 7565 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7566 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7567 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7568 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7569 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7570 PetscCall(PetscFree2(coo_i, coo_j)); 7571 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7572 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7573 coo_i = coo_i2; 7574 coo_j = coo_j2; 7575 } else { /* no offproc values insertion */ 7576 ncoo = ncoo_d; 7577 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7578 7579 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7580 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7581 PetscCall(PetscSFSetUp(mmdata->sf)); 7582 } 7583 mmdata->hasoffproc = hasoffproc; 7584 7585 /* gather (i,j) of nonzeros inserted locally */ 7586 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7587 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7588 PetscInt *coi = coo_i + ncoo_d; 7589 PetscInt *coj = coo_j + ncoo_d; 7590 const PetscInt *jj = mm->j; 7591 const PetscInt *ii = mm->i; 7592 const PetscInt *cmap = cmapa[cp]; 7593 const PetscInt *rmap = rmapa[cp]; 7594 const PetscInt mr = mp[cp]->rmap->n; 7595 const PetscInt rs = C->rmap->rstart; 7596 const PetscInt re = C->rmap->rend; 7597 const PetscInt cs = C->cmap->rstart; 7598 7599 if (mptmp[cp]) continue; 7600 if (rmapt[cp] == 1) { /* consecutive rows */ 7601 /* fill coo_i */ 7602 for (i = 0; i < mr; i++) { 7603 const PetscInt gr = i + rs; 7604 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7605 } 7606 /* fill coo_j */ 7607 if (!cmapt[cp]) { /* type-0, already global */ 7608 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7609 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7610 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7611 } else { /* type-2, local to global for sparse columns */ 7612 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7613 } 7614 ncoo_d += mm->nz; 7615 } else if (rmapt[cp] == 2) { /* sparse rows */ 7616 for (i = 0; i < mr; i++) { 7617 const PetscInt *jj = mm->j + ii[i]; 7618 const PetscInt gr = rmap[i]; 7619 const PetscInt nz = ii[i + 1] - ii[i]; 7620 if (gr >= rs && gr < re) { /* local rows */ 7621 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7622 if (!cmapt[cp]) { /* type-0, already global */ 7623 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7624 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7625 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7626 } else { /* type-2, local to global for sparse columns */ 7627 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7628 } 7629 ncoo_d += nz; 7630 } 7631 } 7632 } 7633 } 7634 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7635 PetscCall(ISDestroy(&glob)); 7636 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7637 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7638 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7639 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7640 7641 /* preallocate with COO data */ 7642 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7643 PetscCall(PetscFree2(coo_i, coo_j)); 7644 PetscFunctionReturn(PETSC_SUCCESS); 7645 } 7646 7647 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7648 { 7649 Mat_Product *product = mat->product; 7650 #if defined(PETSC_HAVE_DEVICE) 7651 PetscBool match = PETSC_FALSE; 7652 PetscBool usecpu = PETSC_FALSE; 7653 #else 7654 PetscBool match = PETSC_TRUE; 7655 #endif 7656 7657 PetscFunctionBegin; 7658 MatCheckProduct(mat, 1); 7659 #if defined(PETSC_HAVE_DEVICE) 7660 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7661 if (match) { /* we can always fallback to the CPU if requested */ 7662 switch (product->type) { 7663 case MATPRODUCT_AB: 7664 if (product->api_user) { 7665 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7666 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7667 PetscOptionsEnd(); 7668 } else { 7669 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7670 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7671 PetscOptionsEnd(); 7672 } 7673 break; 7674 case MATPRODUCT_AtB: 7675 if (product->api_user) { 7676 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7677 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7678 PetscOptionsEnd(); 7679 } else { 7680 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7681 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7682 PetscOptionsEnd(); 7683 } 7684 break; 7685 case MATPRODUCT_PtAP: 7686 if (product->api_user) { 7687 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7688 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7689 PetscOptionsEnd(); 7690 } else { 7691 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7692 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7693 PetscOptionsEnd(); 7694 } 7695 break; 7696 default: 7697 break; 7698 } 7699 match = (PetscBool)!usecpu; 7700 } 7701 #endif 7702 if (match) { 7703 switch (product->type) { 7704 case MATPRODUCT_AB: 7705 case MATPRODUCT_AtB: 7706 case MATPRODUCT_PtAP: 7707 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7708 break; 7709 default: 7710 break; 7711 } 7712 } 7713 /* fallback to MPIAIJ ops */ 7714 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7715 PetscFunctionReturn(PETSC_SUCCESS); 7716 } 7717 7718 /* 7719 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7720 7721 n - the number of block indices in cc[] 7722 cc - the block indices (must be large enough to contain the indices) 7723 */ 7724 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7725 { 7726 PetscInt cnt = -1, nidx, j; 7727 const PetscInt *idx; 7728 7729 PetscFunctionBegin; 7730 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7731 if (nidx) { 7732 cnt = 0; 7733 cc[cnt] = idx[0] / bs; 7734 for (j = 1; j < nidx; j++) { 7735 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7736 } 7737 } 7738 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7739 *n = cnt + 1; 7740 PetscFunctionReturn(PETSC_SUCCESS); 7741 } 7742 7743 /* 7744 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7745 7746 ncollapsed - the number of block indices 7747 collapsed - the block indices (must be large enough to contain the indices) 7748 */ 7749 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7750 { 7751 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7752 7753 PetscFunctionBegin; 7754 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7755 for (i = start + 1; i < start + bs; i++) { 7756 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7757 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7758 cprevtmp = cprev; 7759 cprev = merged; 7760 merged = cprevtmp; 7761 } 7762 *ncollapsed = nprev; 7763 if (collapsed) *collapsed = cprev; 7764 PetscFunctionReturn(PETSC_SUCCESS); 7765 } 7766 7767 /* 7768 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7769 7770 Input Parameter: 7771 . Amat - matrix 7772 - symmetrize - make the result symmetric 7773 + scale - scale with diagonal 7774 7775 Output Parameter: 7776 . a_Gmat - output scalar graph >= 0 7777 7778 */ 7779 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7780 { 7781 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7782 MPI_Comm comm; 7783 Mat Gmat; 7784 PetscBool ismpiaij, isseqaij; 7785 Mat a, b, c; 7786 MatType jtype; 7787 7788 PetscFunctionBegin; 7789 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7790 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7791 PetscCall(MatGetSize(Amat, &MM, &NN)); 7792 PetscCall(MatGetBlockSize(Amat, &bs)); 7793 nloc = (Iend - Istart) / bs; 7794 7795 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7796 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7797 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7798 7799 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7800 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7801 implementation */ 7802 if (bs > 1) { 7803 PetscCall(MatGetType(Amat, &jtype)); 7804 PetscCall(MatCreate(comm, &Gmat)); 7805 PetscCall(MatSetType(Gmat, jtype)); 7806 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7807 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7808 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7809 PetscInt *d_nnz, *o_nnz; 7810 MatScalar *aa, val, *AA; 7811 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7812 7813 if (isseqaij) { 7814 a = Amat; 7815 b = NULL; 7816 } else { 7817 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7818 a = d->A; 7819 b = d->B; 7820 } 7821 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7822 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7823 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7824 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7825 const PetscInt *cols1, *cols2; 7826 7827 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7828 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7829 nnz[brow / bs] = nc2 / bs; 7830 if (nc2 % bs) ok = 0; 7831 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7832 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7833 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7834 if (nc1 != nc2) ok = 0; 7835 else { 7836 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7837 if (cols1[jj] != cols2[jj]) ok = 0; 7838 if (cols1[jj] % bs != jj % bs) ok = 0; 7839 } 7840 } 7841 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7842 } 7843 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7844 if (!ok) { 7845 PetscCall(PetscFree2(d_nnz, o_nnz)); 7846 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7847 goto old_bs; 7848 } 7849 } 7850 } 7851 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7852 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7853 PetscCall(PetscFree2(d_nnz, o_nnz)); 7854 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7855 // diag 7856 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7857 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7858 7859 ai = aseq->i; 7860 n = ai[brow + 1] - ai[brow]; 7861 aj = aseq->j + ai[brow]; 7862 for (PetscInt k = 0; k < n; k += bs) { // block columns 7863 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7864 val = 0; 7865 if (index_size == 0) { 7866 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7867 aa = aseq->a + ai[brow + ii] + k; 7868 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7869 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7870 } 7871 } 7872 } else { // use (index,index) value if provided 7873 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7874 PetscInt ii = index[iii]; 7875 aa = aseq->a + ai[brow + ii] + k; 7876 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7877 PetscInt jj = index[jjj]; 7878 val += PetscAbs(PetscRealPart(aa[jj])); 7879 } 7880 } 7881 } 7882 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7883 AA[k / bs] = val; 7884 } 7885 grow = Istart / bs + brow / bs; 7886 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7887 } 7888 // off-diag 7889 if (ismpiaij) { 7890 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7891 const PetscScalar *vals; 7892 const PetscInt *cols, *garray = aij->garray; 7893 7894 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7895 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7896 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7897 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7898 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7899 AA[k / bs] = 0; 7900 AJ[cidx] = garray[cols[k]] / bs; 7901 } 7902 nc = ncols / bs; 7903 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7904 if (index_size == 0) { 7905 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7906 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7907 for (PetscInt k = 0; k < ncols; k += bs) { 7908 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7909 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7910 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7911 } 7912 } 7913 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7914 } 7915 } else { // use (index,index) value if provided 7916 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7917 PetscInt ii = index[iii]; 7918 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7919 for (PetscInt k = 0; k < ncols; k += bs) { 7920 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7921 PetscInt jj = index[jjj]; 7922 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7923 } 7924 } 7925 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7926 } 7927 } 7928 grow = Istart / bs + brow / bs; 7929 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7930 } 7931 } 7932 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7933 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7934 PetscCall(PetscFree2(AA, AJ)); 7935 } else { 7936 const PetscScalar *vals; 7937 const PetscInt *idx; 7938 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7939 old_bs: 7940 /* 7941 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7942 */ 7943 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7944 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7945 if (isseqaij) { 7946 PetscInt max_d_nnz; 7947 7948 /* 7949 Determine exact preallocation count for (sequential) scalar matrix 7950 */ 7951 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7952 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7953 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7954 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7955 PetscCall(PetscFree3(w0, w1, w2)); 7956 } else if (ismpiaij) { 7957 Mat Daij, Oaij; 7958 const PetscInt *garray; 7959 PetscInt max_d_nnz; 7960 7961 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7962 /* 7963 Determine exact preallocation count for diagonal block portion of scalar matrix 7964 */ 7965 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7966 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7967 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7968 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7969 PetscCall(PetscFree3(w0, w1, w2)); 7970 /* 7971 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7972 */ 7973 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7974 o_nnz[jj] = 0; 7975 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7976 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7977 o_nnz[jj] += ncols; 7978 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7979 } 7980 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7981 } 7982 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7983 /* get scalar copy (norms) of matrix */ 7984 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7985 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7986 PetscCall(PetscFree2(d_nnz, o_nnz)); 7987 for (Ii = Istart; Ii < Iend; Ii++) { 7988 PetscInt dest_row = Ii / bs; 7989 7990 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7991 for (jj = 0; jj < ncols; jj++) { 7992 PetscInt dest_col = idx[jj] / bs; 7993 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7994 7995 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7996 } 7997 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7998 } 7999 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8000 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8001 } 8002 } else { 8003 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8004 else { 8005 Gmat = Amat; 8006 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8007 } 8008 if (isseqaij) { 8009 a = Gmat; 8010 b = NULL; 8011 } else { 8012 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8013 a = d->A; 8014 b = d->B; 8015 } 8016 if (filter >= 0 || scale) { 8017 /* take absolute value of each entry */ 8018 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8019 MatInfo info; 8020 PetscScalar *avals; 8021 8022 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8023 PetscCall(MatSeqAIJGetArray(c, &avals)); 8024 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8025 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8026 } 8027 } 8028 } 8029 if (symmetrize) { 8030 PetscBool isset, issym; 8031 8032 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8033 if (!isset || !issym) { 8034 Mat matTrans; 8035 8036 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8037 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8038 PetscCall(MatDestroy(&matTrans)); 8039 } 8040 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8041 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8042 if (scale) { 8043 /* scale c for all diagonal values = 1 or -1 */ 8044 Vec diag; 8045 8046 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8047 PetscCall(MatGetDiagonal(Gmat, diag)); 8048 PetscCall(VecReciprocal(diag)); 8049 PetscCall(VecSqrtAbs(diag)); 8050 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8051 PetscCall(VecDestroy(&diag)); 8052 } 8053 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8054 if (filter >= 0) { 8055 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8056 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8057 } 8058 *a_Gmat = Gmat; 8059 PetscFunctionReturn(PETSC_SUCCESS); 8060 } 8061 8062 /* 8063 Special version for direct calls from Fortran 8064 */ 8065 8066 /* Change these macros so can be used in void function */ 8067 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8068 #undef PetscCall 8069 #define PetscCall(...) \ 8070 do { \ 8071 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8072 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8073 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8074 return; \ 8075 } \ 8076 } while (0) 8077 8078 #undef SETERRQ 8079 #define SETERRQ(comm, ierr, ...) \ 8080 do { \ 8081 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8082 return; \ 8083 } while (0) 8084 8085 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8086 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8087 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8088 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8089 #else 8090 #endif 8091 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8092 { 8093 Mat mat = *mmat; 8094 PetscInt m = *mm, n = *mn; 8095 InsertMode addv = *maddv; 8096 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8097 PetscScalar value; 8098 8099 MatCheckPreallocated(mat, 1); 8100 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8101 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8102 { 8103 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8104 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8105 PetscBool roworiented = aij->roworiented; 8106 8107 /* Some Variables required in the macro */ 8108 Mat A = aij->A; 8109 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8110 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8111 MatScalar *aa; 8112 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8113 Mat B = aij->B; 8114 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8115 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8116 MatScalar *ba; 8117 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8118 * cannot use "#if defined" inside a macro. */ 8119 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8120 8121 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8122 PetscInt nonew = a->nonew; 8123 MatScalar *ap1, *ap2; 8124 8125 PetscFunctionBegin; 8126 PetscCall(MatSeqAIJGetArray(A, &aa)); 8127 PetscCall(MatSeqAIJGetArray(B, &ba)); 8128 for (i = 0; i < m; i++) { 8129 if (im[i] < 0) continue; 8130 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8131 if (im[i] >= rstart && im[i] < rend) { 8132 row = im[i] - rstart; 8133 lastcol1 = -1; 8134 rp1 = aj + ai[row]; 8135 ap1 = aa + ai[row]; 8136 rmax1 = aimax[row]; 8137 nrow1 = ailen[row]; 8138 low1 = 0; 8139 high1 = nrow1; 8140 lastcol2 = -1; 8141 rp2 = bj + bi[row]; 8142 ap2 = ba + bi[row]; 8143 rmax2 = bimax[row]; 8144 nrow2 = bilen[row]; 8145 low2 = 0; 8146 high2 = nrow2; 8147 8148 for (j = 0; j < n; j++) { 8149 if (roworiented) value = v[i * n + j]; 8150 else value = v[i + j * m]; 8151 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8152 if (in[j] >= cstart && in[j] < cend) { 8153 col = in[j] - cstart; 8154 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8155 } else if (in[j] < 0) continue; 8156 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8157 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8158 } else { 8159 if (mat->was_assembled) { 8160 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8161 #if defined(PETSC_USE_CTABLE) 8162 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8163 col--; 8164 #else 8165 col = aij->colmap[in[j]] - 1; 8166 #endif 8167 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8168 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8169 col = in[j]; 8170 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8171 B = aij->B; 8172 b = (Mat_SeqAIJ *)B->data; 8173 bimax = b->imax; 8174 bi = b->i; 8175 bilen = b->ilen; 8176 bj = b->j; 8177 rp2 = bj + bi[row]; 8178 ap2 = ba + bi[row]; 8179 rmax2 = bimax[row]; 8180 nrow2 = bilen[row]; 8181 low2 = 0; 8182 high2 = nrow2; 8183 bm = aij->B->rmap->n; 8184 ba = b->a; 8185 inserted = PETSC_FALSE; 8186 } 8187 } else col = in[j]; 8188 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8189 } 8190 } 8191 } else if (!aij->donotstash) { 8192 if (roworiented) { 8193 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8194 } else { 8195 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8196 } 8197 } 8198 } 8199 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8200 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8201 } 8202 PetscFunctionReturnVoid(); 8203 } 8204 8205 /* Undefining these here since they were redefined from their original definition above! No 8206 * other PETSc functions should be defined past this point, as it is impossible to recover the 8207 * original definitions */ 8208 #undef PetscCall 8209 #undef SETERRQ 8210